In [16]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime

import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [26]:
df = pd.read_csv('clean_coursebook.csv')

In [27]:
df.columns

Index(['Term Status', 'Class Section', 'Class Title', 'Instructor', 'Day',
       'Start_Time', 'End_Time', 'Location'],
      dtype='object')

In [28]:
df

Unnamed: 0,Term Status,Class Section,Class Title,Instructor,Day,Start_Time,End_Time,Location
0,22SFull,ACCT 2301.001,Introductory Financial Accounting,Jieying Zhang,Monday,08:00:00,08:50:00,JSOM 1.117
1,22SFull,ACCT 2301.001,Introductory Financial Accounting,Jieying Zhang,Wednesday,08:00:00,08:50:00,JSOM 1.117
2,22SFull,ACCT 2301.001,Introductory Financial Accounting,Jieying Zhang,Friday,08:00:00,08:50:00,JSOM 1.117
3,22SFull,ACCT 2301.002,Introductory Financial Accounting,Jieying Zhang,Monday,09:00:00,09:50:00,JSOM 1.117
4,22SFull,ACCT 2301.002,Introductory Financial Accounting,Jieying Zhang,Wednesday,09:00:00,09:50:00,JSOM 1.117
...,...,...,...,...,...,...,...,...
3832,22SOpen,VPAS 6343.001,Essential Plays,Fred Curchack,Monday,16:00:00,18:45:00,JO 4.502
3833,22SFull,VPAS 6373.001,Studies in Film Television and Digital Media -...,Shilyh Warren,Tuesday,16:00:00,18:45:00,ML2 1.216
3834,22SOpen,VPAS 6377.001,Critical Theory and the Visual Arts - Screenin...,Emily-Rose Baker,Thursday,16:00:00,18:45:00,GR 4.208
3835,22SOpen,VPAS 6393.001,Creativity Time-Based Arts Workshop - Video Pe...,Laura Kim,Wednesday,16:00:00,18:45:00,ATC 4.910


In [6]:
df['Start_Time'] = [pd.to_datetime(i).time() for i in df['Start_Time']]
df['End_Time'] = [pd.to_datetime(i).time() for i in df['End_Time']]

In [7]:
def get_number_of_classes_given_time_and_day(t, d, df):
    return len(df[(t >= df['Start_Time']) & (t <= df['End_Time']) & (df['Day'] == d)])

In [8]:
number_of_rooms = len(set(df['Location']))
print(f'Number of rooms in this data : {number_of_rooms}')

Number of rooms in this data : 219


In [9]:
time_range = [(pd.to_datetime('00:00:00') + datetime.timedelta(minutes=i)).time()
             for i in range(0, 24*60, 15)]

In [10]:
used_rooms = [get_number_of_classes_given_time_and_day(t, 'Monday', df)
             for t in time_range]

In [19]:
fig = make_subplots(rows=5, cols=1, shared_xaxes=True)


fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_given_time_and_day(t, 'Monday', df) for t in time_range],
                         name='Monday'), row=1, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_given_time_and_day(t, 'Tuesday', df) for t in time_range],
                         name='Tuesday'), row=2, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_given_time_and_day(t, 'Wednesday', df) for t in time_range],
                         name='Wednesday'), row=3, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_given_time_and_day(t, 'Thursday', df) for t in time_range],
                         name='Thursday'), row=4, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_given_time_and_day(t, 'Friday', df) for t in time_range],
                         name='Friday'), row=5, col=1)

fig.update_layout(
    autosize=False,
    width=1000,
    height=900,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    )
)


fig.update_yaxes(title_text="Number of classes going on", row=3, col=1)


fig.show()

- 10 am to 3:45 pm is when the most classes are going on in general
- Friday doesn't have as many classes going on during 10 am to 3:45 pm as the other days do
- After 10 am, the number of classes going on start to taper off

In [12]:
def get_number_of_classes_that_end_at_time_t(t, d, df):
    return len(df[(t == df['End_Time']) & (df['Day'] == d)])

In [14]:
fig = make_subplots(rows=5, cols=1, shared_xaxes=True)


fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_end_at_time_t(t, 'Monday', df) for t in time_range],
                         name='Monday'), row=1, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_end_at_time_t(t, 'Tuesday', df) for t in time_range],
                         name='Tuesday'), row=2, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_end_at_time_t(t, 'Wednesday', df) for t in time_range],
                         name='Wednesday'), row=3, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_end_at_time_t(t, 'Thursday', df) for t in time_range],
                         name='Thursday'), row=4, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_end_at_time_t(t, 'Friday', df) for t in time_range],
                         name='Friday'), row=5, col=1)

fig.update_layout(
    autosize=False,
    width=1000,
    height=900,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    )
)


fig.update_yaxes(title_text="Number of classes ending", row=3, col=1)


fig.show()

- 11:15 am
    - on Monday, 73 classes end
    - on Tuesday, 121 classes end
    - on Wednesday, 76 classes end
    - on Thursday, 123 classes end
    - on Friday, 4 classes end -> not that useful
    
----

- 12:45 pm
    - on Monday, 118 classes end
    - on Tuesday, 134 classes end
    - on Wednesday, 124 classes end
    - on Thursday, 132 classes end
    - on Friday, 53 classes end

In [20]:
def get_number_of_classes_that_start_at_time_t(t, d, df):
    return len(df[(t == df['Start_Time']) & (df['Day'] == d)])

In [21]:
fig = make_subplots(rows=5, cols=1, shared_xaxes=True)


fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_start_at_time_t(t, 'Monday', df) for t in time_range],
                         name='Monday'), row=1, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_start_at_time_t(t, 'Tuesday', df) for t in time_range],
                         name='Tuesday'), row=2, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_start_at_time_t(t, 'Wednesday', df) for t in time_range],
                         name='Wednesday'), row=3, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_start_at_time_t(t, 'Thursday', df) for t in time_range],
                         name='Thursday'), row=4, col=1)

fig.append_trace(go.Scatter(x=time_range, 
                         y=[get_number_of_classes_that_start_at_time_t(t, 'Friday', df) for t in time_range],
                         name='Friday'), row=5, col=1)

fig.update_layout(
    autosize=False,
    width=1000,
    height=900,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    )
)


fig.update_yaxes(title_text="Number of classes starting", row=3, col=1)


fig.show()

- 10:00 am
    - on Monday, 140 classes start
    - on Tuesday, 153 classes start
    - on Wednesday, 146 classes start
    - on Thursday, 156 classes start
    - on Friday, 91 classes start
    
----

- 11:30 am
    - on Monday, 84 classes start
    - on Tuesday, 109 classes start
    - on Wednesday, 87 classes start
    - on Thursday, 107 classes start
    - on Friday, 5 classes start -> not that useful