In [170]:
import pandas as pd
import numpy as np
import altair as alt
from altair import datum
import datetime

In [171]:
#@title Data parsing functions
def splitData(data):
  for index, row in data.iterrows():
    if(row.From.date() != row.To.date()):
      #print(index)
      endOfDay = pd.to_datetime(str(row.From.date()) + ' 23:59:59')
      startOfDay = pd.to_datetime(str(row.To.date()) + ' 00:00:00')
      hours1 = ((endOfDay - row.From).seconds/3600)
      hours2 = ((row.To - startOfDay).seconds/3600)
      if(row.Cycles == -1):
        row.Cycles = 1
      if(hours1< 0):
        print(row)
      if(hours2<0):
        print(row)  
      part1 = hours1/(hours1+hours2)
      part2 = hours2/(hours1+hours2)
      cycles1 = part1*row.Cycles
      cycles2 = part2*row.Cycles
      DeepSleep1 = part1*float(row.DeepSleep)
      DeepSleep2 = part2*float(row.DeepSleep)
      data.loc[index]= [row.From, endOfDay, hours1, cycles1, DeepSleep1, row.From.date()]
      data.loc[len(data)]= [startOfDay, row.To, hours2, cycles2, DeepSleep2, row.To.date()]
  return data

def addClass(x):
  if(x>9.49):
    return "Too much"
  else:
    if(x>6.49):
      return 'Enough'
    else:
      return 'Not enough'  

def parseHours(x):
  splitted = str(x).split(" ")[1].split(":")
  return float(splitted[0]) + round(float(splitted[1])/60,2)     

In [172]:
#@title Data reading and processing
data_url = 'https://github.com/olexandryermilov/ucu-public/blob/main/sleep_clean.xlsx?raw=true'
data = pd.read_excel(data_url)
data = data.iloc[::2].reset_index()
data = data.drop(['index', 'Tz', 'Id', 'Sched'],axis = 1)
data['Day'] = data.From.map(lambda x: str(x.date()))
data['Hours'] = data.Hours.map(lambda x: float(x))
data['Cycles'] = data.Cycles.map(lambda x: 0 if x<0 else x)
data['DeepSleep'] = data.DeepSleep.map(lambda x: float(x))
data['DeepSleep'] = data.DeepSleep.map(lambda x: 0 if x<0 else x)
newData = splitData(data.copy())
newData = newData[newData.From.map(lambda x: x.date()) == newData.To.map(lambda x: x.date())]
newData['Classification'] = newData.Hours.map(addClass)
newData['From'] = newData.From.map(str)
newData['To'] = newData.To.map(str)
newData['Day'] = newData.Day.map(str)      
newData['HoursFrom'] = newData.From.map(parseHours)
newData['HoursTo'] = newData.To.map(parseHours)
grouped = newData.groupby('Day', as_index = False).sum()    
grouped['Classification'] = grouped.Hours.map(addClass)  
grouped['DeepSleepHours'] = grouped['Hours']*grouped['DeepSleep']
data3 = data.copy()
data3['weekdayTo'] = data3.To.map(lambda x: x.weekday())
data3['weekdayFrom'] = data3.From.map(lambda x: x.weekday())
data3['From'] = data3.From.map(str)
data3['To'] = data3.To.map(str)
data3['Day'] = data3.Day.map(str)
data3['HoursTo'] = data3.To.map(parseHours)
data3['ClassOfDayTo'] = data3.weekdayTo.map(lambda x: 'Weekday' if x<5 else 'Weekend')
data3['HoursFrom'] = data3.From.map(parseHours)
data3['ClassOfDayFrom'] = data3.weekdayFrom.map(lambda x: 'Weekday' if x<5 else 'Weekend')
array = [0]*24
hours = range(0,24)
df1 = pd.DataFrame(data = [], columns = ["Day", "hour"])
for index, row in newData.iterrows():
  fr = int(row['HoursFrom'])
  to = int(row['HoursTo'])+1
  for x in range(fr, to):
      array[x]+=1
      df1.loc[len(df1)] = [row['Day'], x]   
df = pd.DataFrame(data = np.array([array, hours]).T, columns = ["count", "hours"])

In [173]:
#@title Additional variables
domain = ["Too much", 'Enough', 'Not enough']
range_ = ['blue', 'green', 'red']

In [174]:
bind = alt.selection_interval(bind='scales')
points = alt.Chart(grouped).mark_point(opacity = 0.8, size = 1).encode(
    x = alt.X('Day:T', title = "", scale = alt.Scale(domain = newData.Day.unique())),
    y = alt.Y('Hours:Q', title = "Hours"),
    tooltip = [alt.Tooltip('Hours:Q'), alt.Tooltip('Day:T')],
    color= alt.Color('Classification:N', scale=alt.Scale(domain=domain, range=range_), legend=alt.Legend(title = "")),
).properties(width = 1000, height = 600, title = {
    "text": ["University is the main reason I don't get enough sleep"], "subtitle" : ["How long did I sleep during last two years."]}).add_selection(bind)

line = alt.Chart(grouped).mark_line(
    color='red',
    size=2
).transform_window(
    rolling_mean='mean(Hours)',
    frame=[-17, 2]
).encode(
    x=alt.X('Day:T', title = ""),
    y=alt.Y('rolling_mean:Q', title = "Hours")
)

texts = ["Exams and pre-exams", "2 weeks to coursework", "Driving classes", "Exams", "Begin to WFH", "Exams", "Diploma", "UCU entrance", "COVID"]
dates = ["2018-11-24", "2019-05-25","2019-10-17", "2019-12-23", "2020-03-11","2020-05-04", "2020-05-29", "2020-08-14", "2020-10-05"]
hours = [grouped[grouped['Day']==x]['Hours'].values[0] for x in dates]

annotations_df = pd.DataFrame(data = np.array([texts,dates, hours]).T, columns = ["text", "Day", 'Hours'])

annotation = alt.Chart(annotations_df).mark_text(
    align='left',
    baseline='middle',
    fontSize = 12,
    dx = 3,
    dy = 12
).encode(
    x=alt.X('Day:T', title = ""),
    y='Hours:Q',
    text = "text:O"
)

In [175]:
(line + points + annotation).configure_title(fontSize=34).configure_axis(gridOpacity = 0.3)

In [176]:
base = alt.Chart(newData).mark_rule(opacity = 1, size = 0.7).encode(
    x=alt.X('Day:T', title = ""),
    y=alt.Y('HoursFrom:Q', title = ""),
    y2=alt.Y2('HoursTo:Q', title = ""),
    color= alt.Color('Classification:N', scale=alt.Scale(domain=domain, range=range_), legend=alt.Legend(title = "")),
    tooltip = [alt.Tooltip("Day:T"), alt.Tooltip("HoursFrom:Q"), alt.Tooltip("HoursTo:Q")]
)

interval = alt.selection_interval(encodings=['x'])

chart = base.encode(
    x=alt.X('Day:T', scale=alt.Scale(domain=interval.ref()))
).properties(
    width=1200,
    height=500,
    title = {
        "text": "WFH allowed me to nap during the day. But I didn't need it before UCU",
        "subtitle": "What are my sleep patterns? When do I go to sleep and when do I wake up?"
    }
)
view = base.add_selection(
    interval
).properties(
    width=1200,
    height=50,
)
(chart & view).configure_title(fontSize=34).configure_axis(gridOpacity=0.3)

In [177]:
selection = alt.selection_multi(encodings = ["color"])
to = alt.Chart(data3).mark_point().add_selection(selection).transform_filter(selection).encode(
    x = alt.X("HoursTo:Q", title = "Waking up at"),
    y = alt.Y('Hours:Q', title = "Duration of sleep"),
    color = alt.Color("ClassOfDayTo:N", title = ""),
    tooltip = [alt.Tooltip("HoursTo:Q"), alt.Tooltip("Hours:Q")]
).properties(height = 700, width = 700)
fromm = alt.Chart(data3).mark_point().add_selection(selection).transform_filter(selection).encode(
    x = alt.X("HoursFrom:Q", title = "Going to sleep at"),
    y = alt.Y('Hours:Q', title = ""),
    color = alt.Color("ClassOfDayFrom:N", title = ""),
    tooltip = [alt.Tooltip("HoursFrom:Q"), alt.Tooltip("Hours:Q")]
).properties(height = 700, width = 700)
(to|fromm).properties(title = {
    "text":"When I am going to sleep affects my sleep duration more then when I wake up.",
    "subtitle": "How duration of sleep depends on what day is it, when did I go to sleep and when did I wake up?"
    }).configure_title(fontSize=34).configure_axis(gridOpacity = 0.3)

In [178]:
#@title Hidden chart
# this one is really more interesting from technical point of view. but it doesn't show any interesting dependencies so I decided not to include it in my final work
#alt.data_transformers.disable_max_rows()
#brush = alt.selection_interval(encodings= ['x'])
#points_with_selector = alt.Chart(grouped).mark_point(opacity = 0.8, size = 1).encode(
#    x = alt.X('Day:T', title = ""),
#    y = alt.Y('DeepSleepHours:Q', title = "Hours of deep sleep"),
#    tooltip = [alt.Tooltip('DeepSleepHours:Q'), alt.Tooltip('Day:T')],
#    color=alt.condition(brush, alt.value('blue'), alt.value('lightgray')),
#).add_selection(
#    brush
#).properties(width = 800, height = 600, title = "")

#hours_patterns = alt.Chart(df1).mark_point().transform_filter(brush).transform_aggregate(
#    count_hours='count(Day):Q',
#    groupby=["hour"]
#).encode(
#    x = alt.X("hour:Q", scale = alt.Scale(0,23)),
#    y = alt.Y("count_hours:Q"),
#    tooltip = [alt.Tooltip('Day:T'), alt.Tooltip('count:Q')]
#).properties(width = 800, height = 600, title = "")
#points_with_selector | hours_patterns