# ScreenTime Analysis

In [58]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

In [59]:
df=pd.read_csv("Screentime-App-Details.csv")

In [60]:
df.head(10)

Unnamed: 0,Date,Usage,Notifications,Times opened,App
0,08/26/2022,38,70,49,Instagram
1,08/27/2022,39,43,48,Instagram
2,08/28/2022,64,231,55,Instagram
3,08/29/2022,14,35,23,Instagram
4,08/30/2022,3,19,5,Instagram
5,08/31/2022,19,25,20,Instagram
6,09/01/2022,44,23,57,Instagram
7,09/02/2022,16,28,22,Instagram
8,09/03/2022,27,15,25,Instagram
9,09/04/2022,72,29,30,Instagram


In [61]:
df.shape

(54, 5)

In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Date           54 non-null     object
 1   Usage          54 non-null     int64 
 2   Notifications  54 non-null     int64 
 3   Times opened   54 non-null     int64 
 4   App            54 non-null     object
dtypes: int64(3), object(2)
memory usage: 2.2+ KB


In [63]:
df.describe()

Unnamed: 0,Usage,Notifications,Times opened
count,54.0,54.0,54.0
mean,65.037037,117.703704,61.481481
std,58.317272,97.01753,43.836635
min,1.0,8.0,2.0
25%,17.5,25.75,23.5
50%,58.5,99.0,62.5
75%,90.5,188.25,90.0
max,244.0,405.0,192.0


In [64]:
df.nunique()

Date             27
Usage            43
Notifications    48
Times opened     45
App               2
dtype: int64

In [65]:
df.isna().sum()

Date             0
Usage            0
Notifications    0
Times opened     0
App              0
dtype: int64

In [66]:
figure = px.bar(data_frame=df, x = "Date",  y = "Usage", color="App", title="Usage")
figure.show()

In [67]:
figure = px.bar(data_frame=df, 
                x = "Date", 
                y = "Notifications", 
                color="App", 
                title="Notifications")
figure.show()

In [68]:
figure = px.bar(data_frame=df, 
                x = "Date", 
                y = "Times opened", 
                color="App", 
                title="Times opened")
figure.show()

In [69]:
figure = px.scatter(data_frame = df, 
                    x="Notifications",
                    y="Usage", 
                    size="Notifications", 
                    trendline="ols", 
                    title = "Relationship Between Number of Notifications and Usage")
figure.show()

In [70]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()

In [71]:
co_cols=df.select_dtypes(include='O').columns
co_cols

Index(['Date', 'App'], dtype='object')

In [72]:
for i in co_cols:
    df[i]=le.fit_transform(df[i])

In [73]:
from sklearn.model_selection import train_test_split


In [86]:
X=df.drop(['App'],axis=1)
y=df["App"]

In [87]:
xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.2,random_state=42)

In [89]:
from sklearn.linear_model import LogisticRegression
lr= LogisticRegression()
lr.fit(xtrain,ytrain)
y_lr_train_pred = lr.predict(xtrain)
y_lr_test_pred = lr.predict(xtest)

In [90]:
from sklearn.metrics import accuracy_score

In [91]:
print('Train Data')
print(accuracy_score(ytrain,y_lr_train_pred))
print('Test Data')
print(accuracy_score(ytest,y_lr_test_pred))

Train Data
0.9069767441860465
Test Data
1.0


In [94]:
from sklearn.ensemble import RandomForestRegressor

In [95]:
rf = RandomForestRegressor(max_depth=2, random_state=100)
rf.fit(xtrain, ytrain)

y_rf_train_pred = rf.predict(xtrain)
y_rf_test_pred = rf.predict(xtest)

rf_train_mse = mean_squared_error(ytrain, y_rf_train_pred)
rf_train_r2 = r2_score(ytrain, y_rf_train_pred)

rf_test_mse = mean_squared_error(ytest, y_rf_test_pred)
rf_test_r2 = r2_score(ytest, y_rf_test_pred)

rf_result = pd.DataFrame(['Random Forest', rf_train_mse, rf_train_r2, rf_test_mse, rf_test_r2]).transpose()
rf_result.columns = ('Method', 'Training MSE', "Training R2", 'Test MSE', 'Test R2')
rf_result

Unnamed: 0,Method,Training MSE,Training R2,Test MSE,Test R2
0,Random Forest,0.054528,0.781771,0.073491,0.703587


In [96]:
accrf = rf.score(xtrain,ytrain)
print('Random Forest accuracy is',accrf)

Random Forest accuracy is 0.7817712056526107
