In [1]:
import pandas as pd
import numpy as np
import os
import json
from itertools import compress
import re


## <font color='red'>Initializing Data</font> 


### Useful functions to download data from Google Analytics

In [4]:
def get_data(folder_path):
    """ It gives all the data in a folder of a day """
    def open_json(path):
        with open(path) as f:
            data = json.load(f)
        return data    

    directory = os.listdir(folder_path)
    df_list = []
    #Each df takes the name of the given event
    for i in range(len(directory)):
        filename_json = directory[i]
        if filename_json.endswith(".json"):
            file_path = folder_path + '/' + filename_json 
            data      = open_json(file_path)
            #print(data)
            filename  = pd.DataFrame(data).event_name.unique()[0] + '_' + folder_path.split('/')[-1]
        
            globals()[filename] = data
            df_list.append(filename)
     
    print('In this folder there are ' + str(len(directory)) + ' df:\n' + str(df_list))   
    
    return df_list 


In [5]:

def unnest_column(df, nested_key='key'):
    """ Unnesting all the value in the column "event_params" """
    #Local function that unnest the single value
    def flat_nested_columnn_value(nested_key, nested_value):
        return pd.DataFrame(nested_value).set_index(nested_key).transpose().reset_index()
    
    #Iterating it over each row
    col_to_unnest = df.iloc[:,-1] 
    df_in = pd.DataFrame()
    for row in df.index:
        df_flat = flat_nested_columnn_value(nested_key, col_to_unnest[row])
        df_in = pd.concat([df_in, df_flat]) 
    
    #It returns another df with all new features that are still dictionary 
    df_in = df_in.drop(columns='index').set_index(df.index)
    return df_in


def flat_a_row(df, row_index):
    """ Flattening the dictonaries inside the unnested column (transformed in a df) """
    #Local function that flats a single dictonary
    def flat_dict_value(column_name, column_value):
        dict_to_df = pd.Series(column_value).to_frame().transpose()
        dict_to_df.columns = [column_name + '_' + str(name) for name in dict_to_df.columns]
        return dict_to_df
    #Iterating over each column
    df_row = pd.DataFrame()
    for col in df.columns:
        df_flat = flat_dict_value(col, df.loc[row_index,col])
        df_row = pd.concat([df_row, df_flat], axis=1)
    
    return df_row

def flat_unnested_column(df_unnested, df_mother):
    """ Iterating over each row """
    df_out = pd.DataFrame()
    for row in df_unnested.index:
        df_flat = flat_a_row(df_unnested, row)
        df_out = pd.concat([df_out, df_flat]) 

    df_out = df_out.set_index(df_mother.index)
    return df_out


def unnest(df):
    """ Concatenate the original data with the unnested ones """
    column_to_df = unnest_column(df)
    flatten = flat_unnested_column(column_to_df, df)
    return pd.concat([df, flatten], axis=1).drop(columns='event_params') 


def get_df(folder_path):
    """ Return all the unnested and flatten data in a day"""
    df_list = get_data(folder_path)
    for df in df_list:
        print('..' + df + '..')
        globals()[df] = unnest(pd.DataFrame(globals()[df]))#[Filters[df]]


With get_data function we download all the dataframes of a day contained in a folder of the form: "datos-first_day-last_day"

In [16]:
df_list = get_data('./datos-01abr-07abr/20210403')

In this folder there are 9 df:
['FILTER_ADDED_20210403', 'RESTAURANT_BLACKLISTED_20210403', 'HOME_FILTERS_20210403', 'RESTAURANT_FAVOURITE_20210403', 'RESTAURANT_ACTION_20210403', 'MY_FAVS_FILTER_ADDED_20210403', 'view_item_20210403', 'MY_FAVS_REMOVE_RESTAURANT_20210403']


Showing unnested dataframes: get_df generates global variables

In [19]:
get_df('./datos-01abr-07abr/20210403')

In this folder there are 9 df:
['FILTER_ADDED_20210403', 'RESTAURANT_BLACKLISTED_20210403', 'HOME_FILTERS_20210403', 'RESTAURANT_FAVOURITE_20210403', 'RESTAURANT_ACTION_20210403', 'MY_FAVS_FILTER_ADDED_20210403', 'view_item_20210403', 'MY_FAVS_REMOVE_RESTAURANT_20210403']
..FILTER_ADDED_20210403..
..RESTAURANT_BLACKLISTED_20210403..
..HOME_FILTERS_20210403..
..RESTAURANT_FAVOURITE_20210403..
..RESTAURANT_ACTION_20210403..
..MY_FAVS_FILTER_ADDED_20210403..
..view_item_20210403..
..MY_FAVS_REMOVE_RESTAURANT_20210403..


In [20]:
RESTAURANT_ACTION_20210403.head()

Unnamed: 0,event_timestamp,event_name,user_pseudo_id,ga_session_id_string_value,ga_session_id_int_value,ga_session_id_float_value,ga_session_id_double_value,firebase_screen_id_string_value,firebase_screen_id_int_value,firebase_screen_id_float_value,firebase_screen_id_double_value,engaged_session_event_string_value,engaged_session_event_int_value,engaged_session_event_float_value,engaged_session_event_double_value,action_string_value,action_int_value,action_float_value,action_double_value,firebase_screen_string_value,firebase_screen_int_value,firebase_screen_float_value,firebase_screen_double_value,timestamp_string_value,timestamp_int_value,timestamp_float_value,timestamp_double_value,firebase_screen_class_string_value,firebase_screen_class_int_value,firebase_screen_class_float_value,firebase_screen_class_double_value,firebase_event_origin_string_value,firebase_event_origin_int_value,firebase_event_origin_float_value,firebase_event_origin_double_value,name_string_value,name_int_value,name_float_value,name_double_value,id_string_value,id_int_value,id_float_value,id_double_value,ga_session_number_string_value,ga_session_number_int_value,ga_session_number_float_value,ga_session_number_double_value,engaged_session_event_0
0,1617482162766918,RESTAURANT_ACTION,08f523f6c5269ac261fde43a6b8612fb,,1617481828,,,,-6699667872550144056,,,,1.0,,,menu,,,,RestaurantDetails,,,,,,,1617482159758.0,RestaurantDetails,,,,app,,,,La Maruca Castellana,,,,497,,,,,1,,,
1,1617431480539984,RESTAURANT_ACTION,2da3f0dd8e33aca615faacca7d9edcb6,,1617431395,,,,873550206509334963,,,,1.0,,,menu,,,,RestaurantDetails,,,,,,,1617431480078.0,RestaurantDetails,,,,app,,,,Benares,,,,403,,,,,1,,,
2,1617405052388807,RESTAURANT_ACTION,7a195ff4383c72165c4f2805d5a2ba3a,,1617404966,,,,7671154372456552128,,,,1.0,,,curated_by,,,,RestaurantDetails,,,,,,,1617405051301.0,RestaurantDetails,,,,app,,,,Concepto X,,,,451,,,,,2,,,
3,1617405070367815,RESTAURANT_ACTION,7a195ff4383c72165c4f2805d5a2ba3a,,1617404966,,,,7671154372456552131,,,,1.0,,,instagram,,,,RestaurantDetails,,,,,,,1617405069354.0,RestaurantDetails,,,,app,,,,Concepto X,,,,451,,,,,2,,,
4,1617472615061355,RESTAURANT_ACTION,165332d1c3ac5fff31036c8615d9667c,,1617472164,,,,454524847707602982,,,,,,,instagram,,,,RestaurantDetails,,,,,,,1617472615050.0,RestaurantDetails,,,,app,,,,Benares,,,,403,,,,,1,,,


### Selecting the relevant columns

In [11]:
Common_cols                    = ['event_name', 'event_timestamp', 'user_pseudo_id', 'firebase_screen_string_value']

cols_FILTER_ADDED              = Common_cols + ['type_string_value'] 

cols_view_item                 = Common_cols + ['item_name_string_value'] 
                              
cols_CARD_SWIPE                = Common_cols + ['name_string_value', 'dir_string_value']

cols_RESTAURANT_ACTION         = Common_cols + ['name_string_value', 'action_string_value']

cols_RESTAURANT_BLACKLISTED    = Common_cols + ['name_string_value']

cols_HOME_FILTERS              = Common_cols + ['vibes_string_value']

cols_RESTAURANT_FAVOURITE      = Common_cols + ['name_string_value']

cols_MY_FAVS_REMOVE_RESTAURANT = Common_cols + ['name_string_value']

cols_MY_FAVS_FILTER_ADDED      = Common_cols + ['type_string_value']

Filters = {'FILTER_ADDED':cols_FILTER_ADDED, 'view_item':cols_view_item, 
           'CARD_SWIPE':cols_CARD_SWIPE, 'RESTAURANT_ACTION':cols_RESTAURANT_ACTION, 
           'RESTAURANT_BLACKLISTED':cols_RESTAURANT_BLACKLISTED, 
           'HOME_FILTERS':cols_HOME_FILTERS, 'RESTAURANT_FAVOURITE':cols_RESTAURANT_FAVOURITE,
           'MY_FAVS_REMOVE_RESTAURANT':cols_MY_FAVS_REMOVE_RESTAURANT,
           'MY_FAVS_FILTER_ADDED':cols_MY_FAVS_FILTER_ADDED}



In [12]:
def filter_columns(df_list):
    """ It filters the relevant columns for a list of day dataframes """    
    #Add user_id when there is
    Filters_list = Filters.copy()
    for df in df_list:
        data = globals()[df]
        if 'user_id' in data.columns:
            filter_key = df[:len(df) - 9]
            Filters_list[filter_key] = Filters_list[filter_key] + ['user_id'] 
            print(Filters_list[filter_key])

    for df in df_list:
        filter_key = df[:len(df) - 9]
        globals()[df] = (globals()[df])[Filters_list[filter_key]]
        #Rename view_item column:
        if filter_key == 'view_item':
            globals()[df].rename(columns={'item_name_string_value': 'name_string_value'}, inplace=True)
    return

            
            
            

Example of a data of a day

In [21]:
data_day = pd.concat([FILTER_ADDED_20210403, view_item_20210403, RESTAURANT_ACTION_20210403,
            RESTAURANT_BLACKLISTED_20210403, HOME_FILTERS_20210403, RESTAURANT_FAVOURITE_20210403,\
                      MY_FAVS_FILTER_ADDED_20210403,MY_FAVS_REMOVE_RESTAURANT_20210403]).reset_index(drop=True)


In [22]:
data_day.head()

Unnamed: 0,event_timestamp,event_name,user_pseudo_id,ga_session_id_string_value,ga_session_id_int_value,ga_session_id_float_value,ga_session_id_double_value,error_value_string_value,error_value_int_value,error_value_float_value,error_value_double_value,firebase_event_origin_string_value,firebase_event_origin_int_value,firebase_event_origin_float_value,firebase_event_origin_double_value,type_string_value,type_int_value,type_float_value,type_double_value,engaged_session_event_string_value,engaged_session_event_int_value,engaged_session_event_float_value,engaged_session_event_double_value,id_string_value,id_int_value,id_float_value,id_double_value,firebase_screen_class_string_value,firebase_screen_class_int_value,firebase_screen_class_float_value,firebase_screen_class_double_value,firebase_screen_id_string_value,firebase_screen_id_int_value,firebase_screen_id_float_value,firebase_screen_id_double_value,ga_session_number_string_value,ga_session_number_int_value,ga_session_number_float_value,ga_session_number_double_value,firebase_screen_string_value,firebase_screen_int_value,firebase_screen_float_value,firebase_screen_double_value,timestamp_string_value,timestamp_int_value,timestamp_float_value,timestamp_double_value,firebase_error_string_value,firebase_error_int_value,firebase_error_float_value,firebase_error_double_value,engaged_session_event_0,item_id_string_value,item_id_int_value,item_id_float_value,item_id_double_value,item_category_string_value,item_category_int_value,item_category_float_value,item_category_double_value,item_name_string_value,item_name_int_value,item_name_float_value,item_name_double_value,action_string_value,action_int_value,action_float_value,action_double_value,name_string_value,name_int_value,name_float_value,name_double_value,vibes_string_value,vibes_int_value,vibes_float_value,vibes_double_value,option_string_value,option_int_value,option_float_value,option_double_value,neighborhoods_0,session_engaged_0,vibes_0,neighborhoods_string_value,neighborhoods_int_value,neighborhoods_float_value,neighborhoods_double_value,session_engaged_string_value,session_engaged_int_value,session_engaged_float_value,session_engaged_double_value,firebase_screen_0,firebase_screen_id_0,firebase_screen_class_0
0,1617482034296969,FILTER_ADDED,08f523f6c5269ac261fde43a6b8612fb,,1617481828,,,FILTER_ADDED,,,,app,,,,Española,,,,,1.0,,,19,,,,SelectFilters,,,,,-19002095382642162,,,,1,,,SelectFilters,,,,,,,1617482031289.0,,21,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1617405037559801,FILTER_ADDED,7a195ff4383c72165c4f2805d5a2ba3a,,1617404966,,,FILTER_ADDED,,,,app,,,,Menos de 50€,,,,,1.0,,,21,,,,SelectFilters,,,,,7671154372456552126,,,,2,,,SelectFilters,,,,,,,1617405036546.0,,21,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1617472666560363,FILTER_ADDED,165332d1c3ac5fff31036c8615d9667c,,1617472164,,,FILTER_ADDED,,,,app,,,,Sol,,,,,,,,40,,,,SelectFilters,,,,,454524847707602985,,,,1,,,SelectFilters,,,,,,,1617472666554.0,,21,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1617472696925374,FILTER_ADDED,165332d1c3ac5fff31036c8615d9667c,,1617472164,,,FILTER_ADDED,,,,app,,,,Sol,,,,,,,,40,,,,SelectFilters,,,,,454524847707602989,,,,1,,,SelectFilters,,,,,,,1617472696918.0,,21,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1617465985004581,FILTER_ADDED,ece89f49d06509154cce92c727bd1cce,,1617465658,,,FILTER_ADDED,,,,app,,,,La Latina,,,,,1.0,,,25,,,,TinderCards,,,,,-1307328247607765441,,,,1,,,TinderCards,,,,,,,1617465984995.0,,21,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [23]:
filter_columns(['FILTER_ADDED_20210403', 'RESTAURANT_BLACKLISTED_20210403', 'HOME_FILTERS_20210403', 'RESTAURANT_FAVOURITE_20210403', 'RESTAURANT_ACTION_20210403', 'MY_FAVS_FILTER_ADDED_20210403', 'view_item_20210403', 'MY_FAVS_REMOVE_RESTAURANT_20210403'])


After filter_columns all dataframes have just the selected columns

In [24]:
RESTAURANT_ACTION_20210403.head()

Unnamed: 0,event_name,event_timestamp,user_pseudo_id,firebase_screen_string_value,name_string_value,action_string_value
0,RESTAURANT_ACTION,1617482162766918,08f523f6c5269ac261fde43a6b8612fb,RestaurantDetails,La Maruca Castellana,menu
1,RESTAURANT_ACTION,1617431480539984,2da3f0dd8e33aca615faacca7d9edcb6,RestaurantDetails,Benares,menu
2,RESTAURANT_ACTION,1617405052388807,7a195ff4383c72165c4f2805d5a2ba3a,RestaurantDetails,Concepto X,curated_by
3,RESTAURANT_ACTION,1617405070367815,7a195ff4383c72165c4f2805d5a2ba3a,RestaurantDetails,Concepto X,instagram
4,RESTAURANT_ACTION,1617472615061355,165332d1c3ac5fff31036c8615d9667c,RestaurantDetails,Benares,instagram


### Putting all together to retrieve all the data from a folder of several days

In [25]:

def get_day_list(folder_path):
    """ Return the list of string name of data in a day but not the dataframes"""
    def open_json(path):
        with open(path) as f:
            data = json.load(f)
        return data    

    directory = os.listdir(folder_path)
    df_list = []
    #Each df takes the name of the given event
    for i in range(len(directory)):
        filename_json = directory[i]
        if filename_json.endswith(".json"):
            file_path = folder_path + '/' + filename_json 
            data      = open_json(file_path)
            #print(data)
            filename  = pd.DataFrame(data).event_name.unique()[0] + '_' + folder_path.split('/')[-1]
            df_list.append(filename)
         
    return df_list 



def get_week_data(week_folder_path, save=True):
    """ Finally it returns the week data frame """
    all_day_folders  = os.listdir(week_folder_path)[1:]
    #masking just the date folders
    mask = [s.isdigit() for s in all_day_folders]
    all_day_folders = list(compress(all_day_folders, mask))
    df_week = []
    for fold in all_day_folders:
        #getting all the dataframe for a day
        get_df(week_folder_path + '/' + fold)
        #getting the list of dataframe names
        df_list = get_day_list(week_folder_path + '/' + fold)
        #filtering the unecessary columns
        filter_columns(df_list)
        #concatenating all the dataframes obtaining a single day data
        df_day = []
        for df in df_list:
            df_day.append(globals()[df])
        day = pd.concat(df_day)
        #concatenating each day df obtaining a week df
        df_week.append(day)
    week = pd.concat(df_week)  
    
    if save==True:
        day1, day2 = re.findall(r'\d{2}\w{3}', week_folder_path)
        path_csv = 'week_{}_{}.csv'.format(day1, day2)
        week.to_csv(path_csv )
        
    return week    
        

First week data

In [None]:
week = get_week_data('./datos-01mar-08mar') 
week

In [380]:
#week.to_csv('./week_01mar_08mar.csv')
week.head()


Unnamed: 0,event_name,event_timestamp,user_pseudo_id,firebase_screen_string_value,name_string_value,type_string_value,action_string_value,vibes_string_value,dir_string_value,user_id
0,view_item,1615122075978494,cf787fe9a8f5cd3d0a7119e296920e96,RestaurantDetails,Coquetto Bar,,,,,
1,view_item,1615122002536876,cf787fe9a8f5cd3d0a7119e296920e96,RestaurantDetails,Coquetto Bar,,,,,
2,view_item,1615148229440659,C42314CB72C146BFB3A8A447A1F13F1A,RestaurantDetails,El Señor Martín,,,,,
3,view_item,1615152930412024,C42314CB72C146BFB3A8A447A1F13F1A,RestaurantDetails,La Catapa,,,,,
4,view_item,1615152981578048,C42314CB72C146BFB3A8A447A1F13F1A,RestaurantDetails,La Monte,,,,,


In [382]:
week.shape

(21182, 10)

In [383]:
week.loc[:,'event_name'].unique()

array(['view_item', 'FILTER_ADDED', 'MY_FAVS_FILTER_ADDED',
       'RESTAURANT_ACTION', 'RESTAURANT_FAVOURITE', 'HOME_FILTERS',
       'MY_FAVS_REMOVE_RESTAURANT', 'CARD_SWIPE',
       'RESTAURANT_BLACKLISTED'], dtype=object)

Second week data

In [57]:
week2 = get_week_data('./datos-09mar-31mar') 
week2

In this folder there are 8 df:
['MY_FAVS_FILTER_ADDED_20210309', 'MY_FAVS_REMOVE_RESTAURANT_20210309', 'CARD_SWIPE_20210309', 'RESTAURANT_ACTION_20210309', 'RESTAURANT_FAVOURITE_20210309', 'HOME_FILTERS_20210309', 'view_item_20210309', 'FILTER_ADDED_20210309']
..MY_FAVS_FILTER_ADDED_20210309..
..MY_FAVS_REMOVE_RESTAURANT_20210309..
..CARD_SWIPE_20210309..
..RESTAURANT_ACTION_20210309..
..RESTAURANT_FAVOURITE_20210309..
..HOME_FILTERS_20210309..
..view_item_20210309..
..FILTER_ADDED_20210309..
In this folder there are 8 df:
['CARD_SWIPE_20210330', 'MY_FAVS_REMOVE_RESTAURANT_20210330', 'HOME_FILTERS_20210330', 'MY_FAVS_FILTER_ADDED_20210330', 'FILTER_ADDED_20210330', 'RESTAURANT_FAVOURITE_20210330', 'view_item_20210330', 'RESTAURANT_ACTION_20210330']
..CARD_SWIPE_20210330..
..MY_FAVS_REMOVE_RESTAURANT_20210330..
..HOME_FILTERS_20210330..
..MY_FAVS_FILTER_ADDED_20210330..
..FILTER_ADDED_20210330..
..RESTAURANT_FAVOURITE_20210330..
..view_item_20210330..
..RESTAURANT_ACTION_20210330..
In t

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In this folder there are 8 df:
['MY_FAVS_REMOVE_RESTAURANT_20210329', 'FILTER_ADDED_20210329', 'CARD_SWIPE_20210329', 'HOME_FILTERS_20210329', 'RESTAURANT_ACTION_20210329', 'RESTAURANT_FAVOURITE_20210329', 'view_item_20210329', 'MY_FAVS_FILTER_ADDED_20210329']
..MY_FAVS_REMOVE_RESTAURANT_20210329..
..FILTER_ADDED_20210329..
..CARD_SWIPE_20210329..
..HOME_FILTERS_20210329..
..RESTAURANT_ACTION_20210329..
..RESTAURANT_FAVOURITE_20210329..
..view_item_20210329..
..MY_FAVS_FILTER_ADDED_20210329..
In this folder there are 8 df:
['HOME_FILTERS_20210311', 'view_item_20210311', 'FILTER_ADDED_20210311', 'RESTAURANT_ACTION_20210311', 'CARD_SWIPE_20210311', 'RESTAURANT_FAVOURITE_20210311', 'MY_FAVS_FILTER_ADDED_20210311', 'MY_FAVS_REMOVE_RESTAURANT_20210311']
..HOME_FILTERS_20210311..
..view_item_20210311..
..FILTER_ADDED_20210311..
..RESTAURANT_ACTION_20210311..
..CARD_SWIPE_20210311..
..RESTAURANT_FAVOURITE_20210311..
..MY_FAVS_FILTER_ADDED_20210311..
..MY_FAVS_REMOVE_RESTAURANT_20210311..
In t

Unnamed: 0,event_name,event_timestamp,user_pseudo_id,firebase_screen_string_value,type_string_value,name_string_value,dir_string_value,action_string_value,vibes_string_value
0,MY_FAVS_FILTER_ADDED,1615311912205902,E971EE6486E14155A3310DF2027CF5CB,MyRestaurants,🍷Colectivo Decantado,,,,
1,MY_FAVS_FILTER_ADDED,1615311862312883,E971EE6486E14155A3310DF2027CF5CB,SelectFilters,Para ir con amigos,,,,
2,MY_FAVS_FILTER_ADDED,1615311922190919,E971EE6486E14155A3310DF2027CF5CB,SelectFilters,Menos de 50€,,,,
3,MY_FAVS_FILTER_ADDED,1615311929605928,E971EE6486E14155A3310DF2027CF5CB,MyRestaurants,Salamanca,,,,
4,MY_FAVS_FILTER_ADDED,1615311922780924,E971EE6486E14155A3310DF2027CF5CB,MyRestaurants,Para ir con amigos,,,,
...,...,...,...,...,...,...,...,...,...
533,RESTAURANT_FAVOURITE,1616967820501450,61F3113F8C4B44C493D59159039616DD,TinderCards,,La Espumosa,,,
534,RESTAURANT_FAVOURITE,1616967829108460,61F3113F8C4B44C493D59159039616DD,TinderCards,,El Perro y la Galleta,,,
535,RESTAURANT_FAVOURITE,1616967841319472,61F3113F8C4B44C493D59159039616DD,RestaurantDetails,,Bel Mondo,,,
536,RESTAURANT_FAVOURITE,1616967845277476,61F3113F8C4B44C493D59159039616DD,TinderCards,,Bel Mondo,,,


Third week data

In [42]:
week3 = get_week_data('./datos-16abr-31abr') 


In this folder there are 8 df:
['MY_FAVS_REMOVE_RESTAURANT_20210422', 'MY_FAVS_FILTER_ADDED_20210422', 'FILTER_ADDED_20210422', 'view_item_20210422', 'CARD_SWIPE_20210422', 'RESTAURANT_FAVOURITE_20210422', 'HOME_FILTERS_20210422', 'RESTAURANT_ACTION_20210422']
..MY_FAVS_REMOVE_RESTAURANT_20210422..
..MY_FAVS_FILTER_ADDED_20210422..
..FILTER_ADDED_20210422..
..view_item_20210422..
..CARD_SWIPE_20210422..
..RESTAURANT_FAVOURITE_20210422..
..HOME_FILTERS_20210422..
..RESTAURANT_ACTION_20210422..
In this folder there are 9 df:
['RESTAURANT_BLACKLISTED_20210423', 'view_item_20210423', 'MY_FAVS_REMOVE_RESTAURANT_20210423', 'HOME_FILTERS_20210423', 'RESTAURANT_ACTION_20210423', 'RESTAURANT_FAVOURITE_20210423', 'FILTER_ADDED_20210423', 'CARD_SWIPE_20210423', 'MY_FAVS_FILTER_ADDED_20210423']
..RESTAURANT_BLACKLISTED_20210423..
..view_item_20210423..
..MY_FAVS_REMOVE_RESTAURANT_20210423..
..HOME_FILTERS_20210423..
..RESTAURANT_ACTION_20210423..
..RESTAURANT_FAVOURITE_20210423..
..FILTER_ADDED_20

Concatenating all data obtaining the full dataframe from $1^{th}$ of march march to $31^{th}$ of april

In [51]:
wee3_1 = pd.read_csv('week_01abr_07abr.csv', index_col=0)
wee3_2 = pd.read_csv('week_08abr_15abr.csv', index_col=0)
wee3_3 = pd.read_csv('week_16abr_31abr.csv', index_col=0)
month2 = pd.concat([wee3_1,wee3_2,wee3_3]).reset_index(drop=True)
month1 = pd.read_csv('week_01mar_31mar.csv', index_col=0)
full_data = pd.concat([month1,month2]).reset_index(drop=True)
full_data.to_csv('week_01mar_30abr.csv')

In [26]:
pd.read_csv('week_01mar_30abr.csv', index_col=0).head()

Unnamed: 0,event_name,event_timestamp,user_pseudo_id,firebase_screen_string_value,name_string_value,type_string_value,action_string_value,vibes_string_value,dir_string_value
0,view_item,1615122075978494,cf787fe9a8f5cd3d0a7119e296920e96,RestaurantDetails,Coquetto Bar,,,,
1,view_item,1615122002536876,cf787fe9a8f5cd3d0a7119e296920e96,RestaurantDetails,Coquetto Bar,,,,
2,view_item,1615148229440659,C42314CB72C146BFB3A8A447A1F13F1A,RestaurantDetails,El Señor Martín,,,,
3,view_item,1615152930412024,C42314CB72C146BFB3A8A447A1F13F1A,RestaurantDetails,La Catapa,,,,
4,view_item,1615152981578048,C42314CB72C146BFB3A8A447A1F13F1A,RestaurantDetails,La Monte,,,,


### Useful functions to download data from the app

In [27]:
def open_app_datas(dir_path):
    """ To Download Init Data """
    def save_Datos(name, dir_path):
        file_path = dir_path + '/' + name + '.csv' 
        return pd.read_csv(file_path)

    list_of_df = []
    for filename_csv in os.listdir(dir_path):
        if filename_csv.endswith(".csv"):
            filename = os.path.splitext(filename_csv)[0]
            globals()[filename + '_' + dir_path] = save_Datos(filename, dir_path)
            list_of_df.append(filename + '_' + dir_path)
    
    return list_of_df 

open_app_datas gives all the dataframes contained in a folder of the form 'Datos_Init_dd_mm_yyyy', generating global variables of the dataframes

In [28]:
list_data_init = open_app_datas('Datos_Init_01_05_2021')
list_data_init

['homes__first_action_neighborhoods_Datos_Init_01_05_2021',
 'reservations_Datos_Init_01_05_2021',
 'strapi_permission_Datos_Init_01_05_2021',
 'homes__second_action_food_types_Datos_Init_01_05_2021',
 'homes__second_action_neighborhoods_Datos_Init_01_05_2021',
 'homes_Datos_Init_01_05_2021',
 'profiles__vibes_Datos_Init_01_05_2021',
 'push_tokens_Datos_Init_01_05_2021',
 'components_restaurants_promoteds_Datos_Init_01_05_2021',
 'vibe_Datos_Init_01_05_2021',
 'neighborhoods_Datos_Init_01_05_2021',
 'profiles_Datos_Init_01_05_2021',
 'homes__first_action_food_types_Datos_Init_01_05_2021',
 'restaurants_components_Datos_Init_01_05_2021',
 'homecards__vibes_Datos_Init_01_05_2021',
 'foodtypes_Datos_Init_01_05_2021',
 'users-permissions_permission_Datos_Init_01_05_2021',
 'homecards_Datos_Init_01_05_2021',
 'profiles__food_types_Datos_Init_01_05_2021',
 'strapi_webhooks_Datos_Init_01_05_2021',
 'profiles__favorites_Datos_Init_01_05_2021',
 'profiles__push_tokens_Datos_Init_01_05_2021',
 '

In [29]:
restaurants_Datos_Init_01_05_2021.head()

Unnamed: 0,id,name,chef,price,specialDish,phoneNumber,bookUrl,instagramUrl,chefInstagramUrl,mapUrl,website,curated_by,curatedByInstagramUrl,deliveryBy,deliveryUrl,menuUrl,isHidden,isPromoted,stars,legacy_id,published_at,created_by,updated_by,created_at,updated_at,neighborhood,city,reservation,slug
0,312,CORE,Clare Smyth,£125 (Tasting Menu),,+44 20 3937 5086,https://www.opentable.co.uk/widget/reservation...,https://www.instagram.com/corebyclaresmyth/,https://www.instagram.com/chefclaresmyth/,https://goo.gl/maps/N84LNnitrspS8N4M7,https://www.corebyclaresmyth.com/,,,,,,0,0,2.0,036N28qoaexuxy0eYN9C,2021-01-20 16:49:34,,1.0,2021-01-20 16:49:34,2021-04-26 15:11:59,,,,core
1,313,C. B. at Bibendum,Claude Bosi,£185 (Tasting Menu),,+44 20 7629 8866,https://www.opentable.com/restref/client/?rid=...,https://www.instagram.com/claudebosiatbibendum/,,https://goo.gl/maps/MSWVZ2RG8iQYTtpa9,https://claudebosi.com/,,,,,,0,0,2.0,0adXSSSDDoWvPiVmhp9g,2021-01-20 16:49:36,,1.0,2021-01-20 16:49:36,2021-04-26 15:14:47,,,,c-b-at-bibendum
2,314,Le Bistroman Atelier,Stephane del Rio,Entre 60€ y 70€ por persona,Filet de boeuf Wellington,+34 914 47 2713,https://module.lafourchette.com/es_ES/module/5...,https://www.instagram.com/bistroman_atelier/,,https://goo.gl/maps/em4dmjg6rBWDrY7s7,https://lebistroman.es/,Gastro Godzilla,https://www.instagram.com/gastrogodzilla/,Teléfono,https://lebistroman.es/servicio-domicilio/,https://lebistroman.es/restaurant/,0,0,0.0,1BRW2NZrs1srK2ZVHFit,2021-01-20 16:49:38,,1.0,2021-01-20 16:49:38,2021-04-26 14:56:31,40.0,4.0,,le-bistroman-atelier
3,315,Mediamanga,Iván Castro,50€ per person,Gambita frita ahumada,+34 938 32 56 94,http://www.mediamanga.es/booking/reservar_mesa...,https://www.instagram.com/mediamangabcn/,,https://goo.gl/maps/9QPLh7TGRfmYKeBx8,https://www.mediamanga.es/es/,Equipo Velada,,,,,0,0,0.0,1IAul9VsCfKiLzb0MaU3,2021-01-20 16:49:40,,1.0,2021-01-20 16:49:40,2021-04-26 14:52:28,31.0,5.0,,mediamanga
4,316,Zoko Madrid,Javi Álvarez,40€ pp,Empanada de atún en 3 texturas,+34 911 44 93 69,https://module.lafourchette.com/es_ES/module/5...,https://www.instagram.com/zokomadrid/,https://www.instagram.com/chefjavieralvz/,https://goo.gl/maps/pbqTsSnL5NMc9bjP8,https://restaurantezoko.com/project/zoko-madrid/,Equipo Velada,https://www.instagram.com/velada.app/,Web Zoko,https://us12.eveve.com/takeaway/?est=zoko&coun...,https://restaurantezoko.com/wp-content/uploads...,0,0,0.0,1LItQJBULz2AmqslBLSX,2021-01-20 16:49:42,,1.0,2021-01-20 16:49:42,2021-04-26 14:42:51,28.0,4.0,,zoko-madrid
