In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

In [None]:
x1=pd.read_csv("/content/sleepDay_merged.csv",index_col=0)

# ***Data Cleaning***

In [None]:
x1

Unnamed: 0_level_0,SleepDay,TotalSleepRecords,TotalMinutesAsleep,TotalTimeInBed
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1503960366,4/12/2016 12:00:00 AM,1,327,346
1503960366,4/13/2016 12:00:00 AM,2,384,407
1503960366,4/15/2016 12:00:00 AM,1,412,442
1503960366,4/16/2016 12:00:00 AM,2,340,367
1503960366,4/17/2016 12:00:00 AM,1,700,712
...,...,...,...,...
8792009665,4/30/2016 12:00:00 AM,1,343,360
8792009665,5/1/2016 12:00:00 AM,1,503,527
8792009665,5/2/2016 12:00:00 AM,1,415,423
8792009665,5/3/2016 12:00:00 AM,1,516,545


In [None]:
x1['SleepDay'] = pd.to_datetime(x1['SleepDay'])

In [None]:
x1['SleepHour'] = x1['SleepDay'].dt.hour
x1['SleepDay'] = x1['SleepDay'].dt.date


In [None]:
x1

Unnamed: 0_level_0,SleepDay,TotalSleepRecords,TotalMinutesAsleep,TotalTimeInBed
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1503960366,2016-04-12,1,327,346
1503960366,2016-04-13,2,384,407
1503960366,2016-04-15,1,412,442
1503960366,2016-04-16,2,340,367
1503960366,2016-04-17,1,700,712
...,...,...,...,...
8792009665,2016-04-30,1,343,360
8792009665,2016-05-01,1,503,527
8792009665,2016-05-02,1,415,423
8792009665,2016-05-03,1,516,545


In [None]:
x1.drop(columns=['SleepHour'], inplace=True)

In [None]:
x1.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 413 entries, 1503960366 to 8792009665
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   SleepDay            413 non-null    datetime64[ns]
 1   TotalSleepRecords   413 non-null    int64         
 2   TotalMinutesAsleep  413 non-null    int64         
 3   TotalTimeInBed      413 non-null    int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 16.1 KB


# ***Distribution of Total Minutes Asleep***

In [None]:
fig = px.histogram(x1, x='TotalMinutesAsleep', title='Distribution of Total Minutes Asleep', nbins=20)
fig.update_xaxes(title='Total Minutes Asleep')
fig.update_yaxes(title='Frequency')
fig.show()

# ***Correlation Heatmap***

In [None]:
import plotly.figure_factory as ff

correlation_matrix = x1.corr()

fig = ff.create_annotated_heatmap(z=correlation_matrix.values,
                                  x=list(correlation_matrix.columns),
                                  y=list(correlation_matrix.index),
                                  colorscale='Viridis')
fig.update_layout(title='Correlation Heatmap')
fig.show()





In [None]:
import plotly.express as px

avg_minutes_asleep = x1['TotalMinutesAsleep'].mean()
avg_time_in_bed = x1['TotalTimeInBed'].mean()

fig = px.bar(x=['Average Total Minutes Asleep', 'Average Total Time in Bed'],
             y=[avg_minutes_asleep, avg_time_in_bed],
             labels={'x': 'Metrics', 'y': 'Average'},
             title='Sleep Metrics Averages')

fig.update_yaxes(range=[0, max(avg_minutes_asleep, avg_time_in_bed) * 1.1])

fig.show()


# ***Average sleep records per person***

In [None]:
avg_sleep_records = x1['TotalSleepRecords'].mean()
print(avg_sleep_records)

1.11864406779661


# ***Heart Rate Second Wise Analysis***

In [None]:
x3=pd.read_csv("/content/heartrate_seconds_merged.csv",index_col=0)
x3

Unnamed: 0_level_0,Time,Value
Id,Unnamed: 1_level_1,Unnamed: 2_level_1
2022484408,4/12/2016 7:21:00 AM,97
2022484408,4/12/2016 7:21:05 AM,102
2022484408,4/12/2016 7:21:10 AM,105
2022484408,4/12/2016 7:21:20 AM,103
2022484408,4/12/2016 7:21:25 AM,101
...,...,...
8877689391,5/12/2016 2:43:53 PM,57
8877689391,5/12/2016 2:43:58 PM,56
8877689391,5/12/2016 2:44:03 PM,55
8877689391,5/12/2016 2:44:18 PM,55


In [None]:
x3.isna().value_counts()

Time   Value
False  False    2483658
dtype: int64

In [None]:
x3["Time"]=pd.to_datetime(x3["Time"])


In [None]:
x3.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2483658 entries, 2022484408 to 8877689391
Data columns (total 2 columns):
 #   Column  Dtype         
---  ------  -----         
 0   Time    datetime64[ns]
 1   Value   int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 56.8 MB


In [None]:
import pandas as pd
import plotly.express as px

avg_hr_per_minute = x3.groupby(x3['Time'].dt.minute)['Value'].mean().reset_index()

fig = px.line(avg_hr_per_minute, x='Time', y='Value', title='Average Heart Rate per Minute')
fig.show()

In [None]:
avg_hr_per_second = x3.groupby(x3['Time'].dt.second)['Value'].mean().reset_index()

fig = px.line(avg_hr_per_second, x='Time', y='Value', title='Average Heart Rate per Second')
fig.show()