In [None]:
import pandas as pd
import numpy as np
from datetime import date
import seaborn as sns 
import matplotlib.pyplot as plt 
from time import strptime
from datetime import datetime
from dateutil.relativedelta import relativedelta
import warnings; warnings.simplefilter('ignore')

### Ingesting the data
Assumes the data is being read from a screen_time_data.csv file that is in a data folder under the top level project

In [None]:
df = pd.read_csv('.././data/screen_time_data.csv')

In [None]:
df.info()

In [None]:
month_map = {
'January':1,
'Jan':1,
'February':2,
'Feb':2,
'March':3,
'Mar':3,
'April':4,
'Apr':4,
'May':5,
'June':6,
'Jun':6,
'July':7,
'Jul':7,
'August':8,
'Aug':8,
'September':9,
'Sep':9,
'October':10,
'Oct':10,
'November':11,
'Nov':11,
'December':12,
'Dec':12,
}


### Transforming the data in preparation for visualisation

In [None]:

# Create a date field
df['date'] = pd.to_datetime(dict(year=df.year, month=df.month.apply(lambda x: month_map[x]), day=df.day))
# Add week day
df['day_of_week'] = df['date'].dt.day_name()
# Sort the dataframe and remove duplicates by keeping the last row for a date
df.sort_values(["date"], ascending=True, inplace=True)
df.drop_duplicates(subset='date', keep="last", inplace=True)
df['day_of_week'] = df['date'].dt.day_name()
df['short_day_of_week'] = df['day_of_week'].apply(lambda x: x[0:3])

In [None]:
df['total_time'] = df.apply(lambda row: round((row.total_hour * 60) + row.total_min, 1), axis=1)

In [None]:
def convert_mins_to_hrs_and_mins_string(total_minutes:int)->str:
    hours = int(total_minutes/60)
    minutes = int(total_minutes) if hours == 0 else int(total_minutes % hours)
    return f"{hours}h {minutes}m"  

In [None]:
 %matplotlib inline
# Create a figure with a specific size
# plt.figure(figsize=(5, 10))
plt.rcParams['figure.figsize']=(12, 8)
sns.set_style("darkgrid")

In [None]:
total_time_on_phone = convert_mins_to_hrs_and_mins_string(df['total_time'].sum())
print(f"Total time spent on phone : {total_time_on_phone}")

In [None]:
df['total_time'].describe()

In [None]:
day_order =  ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
short_day_order =  ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

In [None]:
monthly_day_average = df[['year','month', 'short_day_of_week', 'total_time']].groupby(['year','month', 'short_day_of_week']).mean()

In [None]:
def get_this_month_and_year() -> (str, int):
    current_month = datetime.now().strftime('%B')
    current_year = int(datetime.now().strftime('%Y'))
    return current_month, current_year

print(get_this_month_and_year())

In [None]:
this_month, year = get_this_month_and_year()
last_month =  (datetime.now() - relativedelta(months=1)).strftime('%B')

In [None]:
this_month_data = monthly_day_average.loc(axis=0)[year, this_month].reset_index().round(0)
last_month_data = monthly_day_average.loc(axis=0)[year, last_month].reset_index().round(0)
year_to_date_data = monthly_day_average.loc(axis=0)[2024].reset_index().round(0)

In [None]:
daily_averages = [year_to_date_data['total_time'].mean().round(0), this_month_data['total_time'].mean().round(0), last_month_data['total_time'].mean().round(0)]

### Visualising the total time spent on phone

In [None]:
plt.rcParams['figure.figsize']=(12, 8)
fig, axs = plt.subplots(ncols=2, nrows=2)
fig.subplots_adjust(hspace=0.3, wspace=.2)

palette = sns.color_palette("mako")
colours = [palette[3] for x in range(5)] + [palette[5] for x in range(2)]

axs[0,1].set(ylim=(0, 360))
axs[1,0].set(ylim=(0, 360))
axs[1,1].set(ylim=(0, 360))
axs[0,1].set(xlabel='Day of week', ylabel='Time in minutes')
axs[1,0].set(xlabel='Day of week', ylabel='Time in minutes')
axs[1,1].set(xlabel='Day of week', ylabel='Time in minutes')
axs[0,1].set_yticks([0, 60, 120, 180, 240, 300, 360])
axs[1,0].set_yticks([0, 60, 120, 180, 240, 300, 360])
axs[1,1].set_yticks([0, 60, 120, 180, 240, 300, 360])


sns.barplot(data=year_to_date_data, x="short_day_of_week", y="total_time", ax=axs[1,0], order=short_day_order, palette=colours).set_title(f"Year to date - daily average by day - {year}")
sns.barplot(data=this_month_data, x="short_day_of_week", y="total_time", ax=axs[0,1], order=short_day_order, palette=colours).set_title(f"Daily average by day - {this_month}")
sns.barplot(data=last_month_data, x="short_day_of_week", y="total_time", ax=axs[1,1], order=short_day_order, palette=colours).set_title(f"Daily average by day - {last_month}")

axs[0,0].set_axis_off()
table = axs[0,0].table(cellText=[[f"{str(int(x))} min"] for x in daily_averages],
                  rowLabels= ["Year to date", f"This month - {this_month}", f"Last month - {last_month}"],
                  colLabels=['Daily average'],
                  cellLoc='center',
                  bbox=[0.5, 0.6, 0.4, 0.4],
)


table.auto_set_font_size(False)
table.set_fontsize(12)

### Prepare the data for application level visualisations

In [None]:
df['application_time_0'] = df.apply(lambda row: (row['application_hour_0'] * 60) + row['application_min_0'], axis=1)

In [None]:
df[['month', 'year', 'application_name_0', 'application_time_0']].groupby(['month', 'year', 'application_name_0']).sum()

In [None]:
def get_group_for_application_slot(data, app_index):
    data[f"application_time_{app_index}"] = data.apply(lambda row: (row[f"application_hour_{app_index}"] * 60) + row[f"application_min_{app_index}"], axis=1)
    group = df[['month', 'year', f"application_name_{app_index}", f"application_time_{app_index}"]].groupby(['month', 'year', f"application_name_{app_index}"]).sum()
    group = group.reset_index().round(0)
    return group.rename(columns={f"application_name_{app_index}": "application_name", f"application_time_{app_index}": "application_time"})

In [None]:
application_groups = list()

for index in range (0,3):
    application_groups.append(get_group_for_application_slot(data=df,app_index=index))
    

In [None]:
app_df = pd.concat(application_groups)
all_apps_grouped_month = app_df.groupby(["month", "year", "application_name"]).sum().reset_index()
all_apps_grouped_year = app_df[["year", "application_name", "application_time"]].groupby(["year", "application_name"]).sum().reset_index()

In [None]:
all_apps_grouped_year['app_hr_min'] = all_apps_grouped_year['application_time'].apply(lambda x: convert_mins_to_hrs_and_mins_string(x))
all_apps_grouped_year.sort_values('application_time', ascending=False).head(20)

In [None]:
all_apps_grouped_month['app_hr_min'] = all_apps_grouped_month['application_time'].apply(lambda x: convert_mins_to_hrs_and_mins_string(x))
all_apps_grouped_month.sort_values('application_time', ascending=False, inplace=True).head(20)

In [None]:
all_apps_grouped_month.query(f"`year` == {year} & `month` == '{last_month}'")

In [None]:
this_month_application_data = all_apps_grouped_month.query(f"`year` == {year} & `month` == '{this_month}'")
last_month_application_data = all_apps_grouped_month.query(f"`year` == {year} & `month` == '{last_month}'")

### Visualising time spent per application

In [None]:
plt.rcParams['figure.figsize']=(12, 8)
fig, axs = plt.subplots(ncols=1, nrows=3)
fig.subplots_adjust(hspace=0.5, wspace=.2)

palette = sns.color_palette("mako")
colours = [palette[3] for x in range(5)] + [palette[5] for x in range(2)]

axs[0].set(ylim=(0, 2800))
axs[0].set(xlabel='Application', ylabel='Time in minutes')
axs[1].set(xlabel='Application', ylabel='Time in minutes')
sns.barplot(data=all_apps_grouped_year, x="application_name", y="application_time", ax=axs[0], palette=palette).set_title(f"Year to date {year}")
sns.barplot(data=last_month_application_data, x="application_name", y="application_time", ax=axs[1], palette=palette).set_title(f"Last month - {last_month}")
sns.barplot(data=this_month_application_data, x="application_name", y="application_time", ax=axs[2], palette=palette).set_title(f"This month - {this_month}")