## Import Libraries

In [None]:
import pandas as pd            #to import the csv files
from tqdm import tqdm          #to track progress
tqdm.pandas()

## Data Import

In [None]:
app_events = pd.read_csv("dataset/app_events.csv")
app_labels = pd.read_csv("dataset/app_labels.csv")
events = pd.read_csv("dataset/events.csv")
gender_age_test = pd.read_csv("dataset/gender_age_test.csv")
gender_age_train = pd.read_csv("dataset/gender_age_train.csv")
label_categories = pd.read_csv("dataset/label_categories.csv")
phone_brand_device_model = pd.read_csv("dataset/phone_brand_device_model.csv")

In [None]:
print(app_events.columns)
print(app_labels.columns)
print(events.columns)
print(gender_age_train.columns)
print(label_categories.columns)
print(phone_brand_device_model.columns)

In [None]:
#initialize an empty dataframe to store records
record_df = pd.DataFrame(columns=['device_id','brand','model','events'])

## Utility Functions

In [None]:
def get_app_activity(row):
    """Function that details about the events triggered by a particular device"""
    
    event_id = row['event_id']
    timestamp = row['timestamp']
    coordinates = (row['latitude'], row['longitude'])
    
    activity = []
    
    apps_df = app_events[app_events['event_id']==event_id]
    
    installed_apps = list(apps_df['app_id'].drop_duplicates())
    
    active_apps = list(apps_df[apps_df['is_active']==1]['app_id'])
    
    activity = [[i, timestamp, coordinates] for i in active_apps]
   
    return installed_apps, activity

In [None]:
def get_device_info(row):
    """Function that gets details about a particular device"""
    
    device_id = row['device_id']
    brand = row['phone_brand']
    model = row['device_model']
    
    events_df = events[events['device_id']==device_id]
    
    activity = []
    installed_apps = []
        
    for index, row in events_df.iterrows():
        i, a = get_app_activity(row)
        installed_apps.extend(i)
        activity.extend(a)
    
    installed_apps = set(installed_apps)
    
    record_df.loc[len(record_df)] =  [device_id, brand, model, installed_apps, activity]

In [None]:
#store datta in format device, brand, model, event_details
phone_brand_device_model.progress_apply(get_device_info, axis = 1)

In [None]:
records_df.to_pickle("dataset_records.pkl")