In [3]:
import pandas as pd
import numpy as np

import plotly.express as px

# <center>installs</center>

In [4]:
df_installs = pd.read_csv('../../data/installs.csv', parse_dates=['event_date'])
df_installs.head()

Unnamed: 0,install_id,country_id,app_id,network_id,event_date,device_os_version
0,6b168a9f7a13119b87bfeb834d159f6879ff000205cb27...,1,174,26,2022-12-13,10
1,218ecd973c988fe76dfd247f92e9317485010844163a51...,1,174,60,2022-12-14,10
2,f67c1382fc3af8a24cb6f23bff381279d582eb67d4043e...,17,122,26,2022-12-30,11
3,afe4dc7c1663fba7ba3bc7cf6a308c38c1cb409d253f7f...,1,121,1111,2022-12-30,8.0.0
4,417acac6a6bdb88a83371f8ab482691d16896e8681afcb...,109,325,10,2022-12-05,11


In [5]:
df_installs['country_id'] = df_installs.country_id.astype('category')
df_installs['app_id'] = df_installs.app_id.astype('category')
df_installs['network_id'] = df_installs.network_id.astype('category')
df_installs['device_os_version'] = df_installs.device_os_version.astype('category')
#Drop duplicate columns
df_installs.drop_duplicates(inplace=True)

In [4]:
df_installs.info()

<class 'pandas.core.frame.DataFrame'>
Index: 216887 entries, 0 to 216887
Data columns (total 6 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   install_id         216887 non-null  object        
 1   country_id         216887 non-null  category      
 2   app_id             216887 non-null  category      
 3   network_id         216887 non-null  category      
 4   event_date         216887 non-null  datetime64[ns]
 5   device_os_version  216887 non-null  category      
dtypes: category(4), datetime64[ns](1), object(1)
memory usage: 5.8+ MB


In [5]:
category_columns = list(df_installs.dtypes[df_installs.dtypes =='category'].index)
for col in category_columns:
    print(f'Column: {col}')
    print(f'Unique values: {df_installs[col].unique().tolist()}')
    print('-'*120)

Column: country_id
Unique values: [1, 17, 109, 213]
------------------------------------------------------------------------------------------------------------------------
Column: app_id
Unique values: [174, 122, 121, 325, 277, 97, 374, 95, 71, 154, 94, 73, 380, 189, 104, 275, 256, 98, 370, 321, 302, 298, 237, 184, 404, 110, 407, 148, 405, 402, 390, 152, 408, 191, 219, 96, 183, 304, 179, 108, 238, 172, 120, 204, 235, 115, 180, 228, 150, 294, 113]
------------------------------------------------------------------------------------------------------------------------
Column: network_id
Unique values: [26, 60, 1111, 10]
------------------------------------------------------------------------------------------------------------------------
Column: device_os_version
Unique values: ['10', '11', '8.0.0', '12', '16.1.1', '13', '9', '16.0.3', '8.1.0', '15.3.1', '15.2.1', '15.7.1', '16.0.2', '16.1.2', '15.6.1', '7.0', '16.0', '16.1', '15.7', '16.3', '7.1.2', '15.5', '5.1.1', '7.1.1', '6.0', '14

# <center>Data visualization</center>

In [6]:
vis_df = df_installs['country_id'].value_counts().to_frame().reset_index()

fig = px.pie(vis_df, 
            values='count', 
            names='country_id',
            color_discrete_sequence=['#131148', '#00ba58'])
            
fig.update_layout(
    title={
        'text': 'Installs',
        'y':0.95,
        'x':0.5,
        'font':dict(size=25),
        'xanchor': 'center',
        'yanchor': 'top'
    },
    legend_title='Country id'
)

fig.add_layout_image(
    dict(
        source="../../assets/just_dice.svg",
        xref="paper", yref="paper",
        x=0, y=1.03,
        sizex=0.2, sizey=0.2,
        xanchor="left", yanchor="bottom"
    )
)


fig.show()
fig.write_image("../../Reports/installs/Pie_installs_country.svg")
del vis_df

In [7]:
vis_df = df_installs['network_id'].value_counts().to_frame().reset_index()

fig = px.pie(vis_df, 
            values='count', 
            names='network_id',
            color_discrete_sequence=['#131148', '#00ba58'])
            
fig.update_layout(
    title={
        'text': 'Installs',
        'y':0.95,
        'x':0.5,
        'font':dict(size=25),
        'xanchor': 'center',
        'yanchor': 'top'
    },
    legend_title='network id'
)

fig.add_layout_image(
    dict(
        source="../../assets/just_dice.svg",
        xref="paper", yref="paper",
        x=0, y=1.03,
        sizex=0.2, sizey=0.2,
        xanchor="left", yanchor="bottom"
    )
)
fig.show()
fig.write_image("../../Reports/installs/Pie_installs_network.svg")
del vis_df

In [13]:
vis_df = df_installs['app_id'].value_counts().to_frame().reset_index()

vis_df['app_id'] = vis_df['app_id'].astype('category')
vis_df['app_id'] = vis_df['app_id'].apply(lambda x : f'app {x}')

fig = px.bar(vis_df, 
            x='app_id', 
            y='count', 
            barmode='group', 
            template='plotly_white',
            color_discrete_sequence=['#131148', '#00ba58'])

fig.add_layout_image(
    dict(
        source="../../assets/just_dice.svg",
        xref="paper", yref="paper",
        x=0, y=1.03,
        sizex=0.2, sizey=0.2,
        xanchor="left", yanchor="bottom"
    )
)

fig.update_layout(
    title={
        'text': 'Installs',
        'y':0.95,
        'x':0.5,
        'font':dict(size=25),
        'xanchor': 'center',
        'yanchor': 'top'
    },
    yaxis_title='count',
    xaxis_title='App'
)
fig.show()

del vis_df

In [9]:
vis_df = df_installs.groupby(by=['event_date', 'network_id']).size().reset_index(name='counts').sort_index()
vis_df = pd.pivot_table(vis_df, index='event_date', columns='network_id')
vis_df = vis_df['counts'].reset_index()

fig = px.line(vis_df, 
            x="event_date", 
            y=vis_df.columns, 
            template="plotly_white",
            color_discrete_sequence=['#131148', '#00ba58', '#31D1C9', '#4BE000'])


fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
    ticklabelmode="period")
    

fig.update_layout(
    title={
        'text': 'Installs',
        'y':0.95,
        'x':0.5,
        'font':dict(size=25),
        'xanchor': 'center',
        'yanchor': 'top'
    },
    yaxis_title='Count',
    xaxis_title='Time'
)

fig.add_layout_image(
    dict(
        source="../../assets/just_dice.svg",
        xref="paper", yref="paper",
        x=0, y=1.03,
        sizex=0.2, sizey=0.2,
        xanchor="left", yanchor="bottom"
    )
)

fig.show()
fig.write_image("../../Reports/installs/timeseries_installs_network.svg")

del vis_df

In [11]:
vis_df = df_installs.groupby(by=['event_date', 'country_id']).size().reset_index(name='counts').sort_index()
vis_df = pd.pivot_table(vis_df, index='event_date', columns='country_id')
vis_df = vis_df['counts'].reset_index()

fig = px.line(vis_df, x="event_date", 
            y=vis_df.columns, 
            template="plotly_white",
            color_discrete_sequence=['#131148', '#00ba58', '#31D1C9', '#4BE000'])

fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
    ticklabelmode="period")


fig.update_layout(
    title={
        'text': 'Installs',
        'y':0.95,
        'x':0.5,
        'font':dict(size=25),
        'xanchor': 'center',
        'yanchor': 'top'
    },
    yaxis_title='Count',
    xaxis_title='Time'
)

fig.add_layout_image(
    dict(
        source="../../assets/just_dice.svg",
        xref="paper", yref="paper",
        x=0, y=1.03,
        sizex=0.2, sizey=0.2,
        xanchor="left", yanchor="bottom"
    )
)

fig.show()
# fig.write_image("../../Reports/installs/timeseries_installs_country.svg")
del vis_df