# Time of Events - day vs night

Build a series time of events by week or month. 

In [1]:
# libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

In [2]:
# load data and check

df = pd.read_csv("../Data/data.csv")
df["DateTime"] = pd.to_datetime(df["DateTime"])

df.set_index("DateTime", inplace=True)

df.head(10)

Unnamed: 0_level_0,Feed,Poo,Wee,Vomit
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-07-30 13:00:00,True,True,True,False
2022-07-30 17:00:00,True,False,True,False
2022-07-30 19:45:00,True,False,True,False
2022-07-30 22:20:00,True,True,False,False
2022-07-31 01:25:00,True,True,True,False
2022-07-31 05:30:00,True,False,True,False
2022-07-31 09:45:00,True,False,True,False
2022-07-31 14:00:00,True,False,True,False
2022-07-31 18:30:00,True,False,True,False
2022-07-31 22:05:00,True,False,True,False


In [3]:
# function to categorise events into time periods

def categoriser_time(index: pd.core.indexes) -> list:
    categorised_list = []
    for event in index:
        time = event.time()
        if time < datetime.time(6, 0):
            categorised_list.append("Midnight to 6 a.m.")
            #print(f"{event} categorised to: Midnight to 6 a.m.")
        elif time <= datetime.time(18, 0):
            categorised_list.append("6 a.m. to 6 p.m.")
            #print(f"{event} categorised to: 6 a.m. to 6 p.m.")
        elif time <= datetime.time(23, 59):
            categorised_list.append("6 p.m. to Midnight")
            #print(f"{event} categorised to: 6 p.m. to Midnight.")
        else:
            print(f"NOT CATEGORISED: {event}")
    return categorised_list

# add categorisation to df
df["categorised_events"] = categoriser_time(df.index)

# check df
df



Unnamed: 0_level_0,Feed,Poo,Wee,Vomit,categorised_events
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-07-30 13:00:00,True,True,True,False,6 a.m. to 6 p.m.
2022-07-30 17:00:00,True,False,True,False,6 a.m. to 6 p.m.
2022-07-30 19:45:00,True,False,True,False,6 p.m. to Midnight
2022-07-30 22:20:00,True,True,False,False,6 p.m. to Midnight
2022-07-31 01:25:00,True,True,True,False,Midnight to 6 a.m.
...,...,...,...,...,...
2023-08-14 18:15:00,True,False,True,False,6 p.m. to Midnight
2023-08-15 05:35:00,True,True,True,False,Midnight to 6 a.m.
2023-08-15 18:05:00,True,False,True,False,6 p.m. to Midnight
2023-08-16 05:05:00,True,True,True,False,Midnight to 6 a.m.


In [4]:
# create new df using the categorised data as dummies
df_time = pd.get_dummies(df["categorised_events"])

display(df_time)

# resample to day
df_month = df_time.resample("M").sum()

display(df_month)

Unnamed: 0_level_0,6 a.m. to 6 p.m.,6 p.m. to Midnight,Midnight to 6 a.m.
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-07-30 13:00:00,True,False,False
2022-07-30 17:00:00,True,False,False
2022-07-30 19:45:00,False,True,False
2022-07-30 22:20:00,False,True,False
2022-07-31 01:25:00,False,False,True
...,...,...,...
2023-08-14 18:15:00,False,True,False
2023-08-15 05:35:00,False,False,True
2023-08-15 18:05:00,False,True,False
2023-08-16 05:05:00,False,False,True


Unnamed: 0_level_0,6 a.m. to 6 p.m.,6 p.m. to Midnight,Midnight to 6 a.m.
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-07-31,4,4,2
2022-08-31,106,49,44
2022-09-30,122,44,54
2022-10-31,144,31,47
2022-11-30,148,20,30
2022-12-31,175,12,32
2023-01-31,178,33,11
2023-02-28,153,34,15
2023-03-31,178,39,19
2023-04-30,167,9,28


In [5]:
# create df of average events per day for specified time periods

df_final = pd.DataFrame(data={
    "firstFiveMonths": df_month.iloc[1:6,].sum()/153, # 153 = number of days in period
    "sixMonthsOn": df_month.iloc[6:-1].sum()/181 # 181 = number of days in period
})

# Transpose df 
df_final = df_final.T

# reset index
df_final = df_final.reset_index()

# final check
df_final

Unnamed: 0,index,6 a.m. to 6 p.m.,6 p.m. to Midnight,Midnight to 6 a.m.
0,firstFiveMonths,4.542484,1.019608,1.352941
1,sixMonthsOn,5.265193,0.994475,0.906077


In [6]:
# Save to json
df_final.to_json(path_or_buf="../Data/Website_Data/day_night.json", orient="records")

## Stats for commentary

In [7]:
# recreate df_final without tranpose

df_final = pd.DataFrame(data={
    "firstFiveMonths": df_month.iloc[1:6,].sum()/153, # 153 = number of days in period
    "sixMonthsOn": df_month.iloc[6:-1].sum()/181 # 181 = number of days in period
})

# total nappy changes by time period

print(f"total average nappy changes in first five months: {df_final["firstFiveMonths"].sum()}")

print(f"total average nappy changes six months on: {df_final["sixMonthsOn"].sum()}")

total average nappy changes in first five months: 6.915032679738562
total average nappy changes six months on: 7.165745856353591


In [8]:
# calculate percent of total by time period

df_final["fiveMpercent"] = df_final["firstFiveMonths"] / df_final["firstFiveMonths"].sum()
df_final["sixMpercent"] = df_final["sixMonthsOn"] / df_final["sixMonthsOn"].sum()

df_final

Unnamed: 0,firstFiveMonths,sixMonthsOn,fiveMpercent,sixMpercent
6 a.m. to 6 p.m.,4.542484,5.265193,0.6569,0.734773
6 p.m. to Midnight,1.019608,0.994475,0.147448,0.138782
Midnight to 6 a.m.,1.352941,0.906077,0.195652,0.126446


In [9]:
# number of times there was an event during midnight to 6 a.m. for all time

df_day = df_time.resample("D").sum()

df_day["Midnight to 6 a.m."].value_counts()

Midnight to 6 a.m.
1    223
2     81
0     78
4      1
Name: count, dtype: int64