## Import Libraries

In [1]:
import pandas as pd            #to import dataset
from tqdm import tqdm          #to track progress

## Data Import

In [2]:
df_specs = pd.read_csv("dataset/dataset_specifications.csv")           #dataset containing the phone specifications
df_train = pd.read_pickle("dataset/dataset_train.pkl")
df_test = pd.read_pickle("dataset/dataset_test.pkl")

In [4]:
tqdm.pandas()
df_train = df_train.progress_apply(transform_lowercase, axis = 1)
df_test = df_test.progress_apply(transform_lowercase, axis = 1)

100%|██████████████████████████████████████████████████████████████████████████| 74645/74645 [00:12<00:00, 6055.77it/s]
100%|████████████████████████████████████████████████████████████████████████| 112071/112071 [00:16<00:00, 6625.02it/s]


## Data Cleaning

In [5]:
df_specs = df_specs[df_specs['screen_size'].notnull()]

In [6]:
def add_specs(row):
    """Function that adds the phone specifications for a given brand and model"""
    
    row['screen_size']=0
    row['ram_gb']=0
    row['release_month']=0
    row['release_year']=0
    row['camera']=0
    
    phone = df_specs[(df_specs['phone_brand']==row['brand']) & (df_specs['device_model']==row['model'])]
    
    if len(phone)>0:
        
        row['screen_size']=phone['screen_size'].values[0]
        row['ram_gb']=phone['ram_gb'].values[0]
        row['release_month']=phone['release_month'].values[0]
        row['release_year']=phone['release_year'].values[0]
        row['camera']=phone['camera'].values[0]
     
    return row

In [7]:
df_train_specs = df_train.progress_apply(add_specs, axis = 1)
df_test_specs = df_test.progress_apply(add_specs, axis = 1)

100%|███████████████████████████████████████████████████████████████████████████| 74645/74645 [05:06<00:00, 243.79it/s]
100%|█████████████████████████████████████████████████████████████████████████| 112071/112071 [07:38<00:00, 244.68it/s]


In [9]:
df_train_specs.to_pickle("dataset/dataset_train_with_specs.pkl")
df_test_specs.to_pickle("dataset/dataset_test_with_specs.pkl")