In [133]:
import pandas as pd
import altair as alt
import numpy as np
import calendar
import random
from datetime import datetime as dt

In [152]:
df01 = pd.read_csv("SEL_Headcounts_FY19.csv") # SEL_Count_FY22 table
df02 = pd.read_csv("SEL_Headcounts_FY22.csv") # SEL_Count_FY22 table

In [135]:
fall19 = pd.melt(df01, # making a 'tidy' dataframe
                       ["DATE"],
                       var_name="Time",
                       value_name="Count")
fall19 = fall19.sort_values(by=["DATE"])

fall21 = pd.melt(df02, # making a 'tidy' dataframe
                       ["DATE"],
                       var_name="Time",
                       value_name="Count")
fall21 = fall21.sort_values(by=["DATE"])

In [136]:
# International_time() function converts an SEL_Headcount dataframe from a 12-hour clock to 24-hour clock

def international_time(dataframe): # accepts only one arguement (i.e., dataframe)
    dataframe['Count'].unique()
    twelve_hour = list(dataframe['Time'].unique()) #list of the Time values 
    twelve_hour.sort()
    international = ['10:00','22:00','11:00','23:00','00:00','12:00','01:00','13:00','02:00','14:00','03:00','15:00','04:00'
                 ,'16:00', '05:00','17:00','06:00','18:00','07:00','19:00','08:00','20:00','09:00','21:00']
    dataframe['Time'] = dataframe['Time'].replace(twelve_hour,international) 
    return dataframe

In [137]:
semesters = [fall19, fall21] # creating a list containing our SEL Headcount df
for index, item in enumerate(semesters): # will loop equal to the lenght of the list (i.e., twice)
    international_time(item) # inserting a dataframe depending on the index (e.g., semester[0] = fall19)
    # equivalent to semesters[index] == item

In [138]:
print (fall19.shape, '\n\n', fall21.shape) # returns row number and column number, repectfully

(4920, 3) 

 (3672, 3)


In [139]:
# removing non_numeric entries from dataframes
# will result in smaller datasets

fall21 = fall21[pd.to_numeric(fall21['Count'], errors='coerce').notnull()]
fall19 = fall19[pd.to_numeric(fall19['Count'], errors='coerce').notnull()]

In [140]:
print (fall19.shape, '\n\n', fall21.shape) # returns row number and column number, repectfully

(3576, 3) 

 (2130, 3)


In [141]:
# Converting Time header to datetime datatype

fall19['DATE'] = pd.to_datetime(fall19['DATE'], errors='coerce')
fall21['DATE'] = pd.to_datetime(fall21['DATE'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fall19['DATE'] = pd.to_datetime(fall19['DATE'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fall21['DATE'] = pd.to_datetime(fall21['DATE'], errors='coerce')


In [142]:
# adding a semester column to the dataframes
fall19['Semester'] = 'Fall2019'
fall21['Semester'] = 'Fall2021'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fall19['Semester'] = 'Fall2019'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fall21['Semester'] = 'Fall2021'


In [143]:
fall21.info()
# .info() method returns information on the 'thing' 
# e.g., row and column number, headers, datatypes, class

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2130 entries, 1530 to 3671
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   DATE      2130 non-null   datetime64[ns]
 1   Time      2130 non-null   object        
 2   Count     2130 non-null   object        
 3   Semester  2130 non-null   object        
dtypes: datetime64[ns](1), object(3)
memory usage: 83.2+ KB


In [144]:
fall19.info()
# .info() method returns information on the 'thing' 
# e.g., row and column number, headers, datatypes, class

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3576 entries, 0 to 4919
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   DATE      3576 non-null   datetime64[ns]
 1   Time      3576 non-null   object        
 2   Count     3576 non-null   object        
 3   Semester  3576 non-null   object        
dtypes: datetime64[ns](1), object(3)
memory usage: 139.7+ KB


In [145]:
# concatiating/combining our two dataframes
df = pd.concat([fall19, fall21])

In [154]:
df # view of joined table

Unnamed: 0,DATE,Time,Count,Semester
0,2019-08-21,07:00,2,Fall2019
3895,2019-08-21,02:00,1,Fall2019
3690,2019-08-21,01:00,2,Fall2019
3485,2019-08-21,00:00,4,Fall2019
3280,2019-08-21,23:00,2,Fall2019
...,...,...,...,...
2753,2021-11-30,00:00,57,Fall2021
2906,2021-11-30,01:00,47,Fall2021
3059,2021-11-30,02:00,26,Fall2021
3212,2021-11-30,03:00,12,Fall2021


Altair limtis the dataset row number to only 5000 more info here: [MaxRowError](https://altair-viz.github.io/user_guide/faq.html)

So, i used a subset of the total data in our dataframe (i.e., table)
df.sample(5000)

In [146]:
hist = alt.Chart(df.sample(5000)).mark_bar().encode(
    alt.X("Time:O"),
    alt.Y("mean(Count):Q", title='Average Count'),
    alt.Color('Semester:N'),
    column='Semester:N',
    tooltip=['mean(Count)','Semester','Time']
)
hist

### Comparison of two Bar Charts, Fall19 and Fall21 

you can use your mouse and view additional data by hovering over the graphs.


X-axis is the time of day in a 24-hour clock 

Y-axis is the average count for that time throughout the semester


In [147]:
alt.Chart(df.sample(5000), title='Fall2019 vs Fall2021').mark_line(point = True).encode(
    x = alt.X("Time:O"),
    y=alt.Y("mean(Count):Q",title='Average Count'),
    color='Semester:N',
    tooltip=['mean(Count)','Semester', 'Time']
)

### Line plot with Fall19 and Fall21 data

you can use your mouse and view additional data by hovering over the graphs.


In [148]:
box = alt.Chart(df.sample(5000)).mark_boxplot(extent='min-max').encode(
    alt.X('Time:O'),
    alt.Y('Count:Q'),
    alt.Color('Semester'),
    alt.Column('Semester')
)

box

### Comparison of two Box plot, Fall19 and Fall21

you can use your mouse and view additional data by hovering over the graphs.

[Box Charts](https://en.wikipedia.org/wiki/Box_plot) are a more informative (e.g.,, providing summary numbers like max, min, average, outliers) version than a traditional bar graph but has a learning curve.

In [149]:
Total = alt.Chart(df.sample(5000), title='Total Counts from Semesters').mark_bar().encode(
    alt.X("Semester:O"),
    alt.Y("sum(Count):Q"),
    alt.Color('Semester:N'),
    tooltip=['mean(Count)', 'sum(Count)']
)
Total

### Bar Chart proving the total count for Fall19 and Fall21

you can use your mouse and view additional data by hovering over the graphs.


In [150]:
fall19HeatMap = alt.Chart(fall19, title='SEL Fall 2019 Average Count').mark_rect().encode(
    alt.X('day(DATE):O',title="Weekday"),
    alt.Y('Time:O', title='hour of day'),
    alt.Color('mean(Count):Q'),
    tooltip=['day(DATE)', 'mean(Count)','Time']
).properties(height=500, width=400).configure_title(fontSize=20)

fall19HeatMap

### Fall2019 GridHeat Map

you can use your mouse and view additional data by hovering over the graphs.

A [heatmap](https://chartio.com/learn/charts/heatmap-complete-guide/) depicts values for a main variable of interest across two axis variables as a grid of colored squares. The axis variables are divided into ranges like a bar chart or histogram, and each cell’s color indicates the value of the main variable in the corresponding cell range.

In [151]:
fall21HeatMap = alt.Chart(fall19, title='SEL Fall 2021 Average Count').mark_rect().encode(
    alt.X('day(DATE):O',title="Weekday"),
    alt.Y('Time:O', title='hour of day'),
    alt.Color('mean(Count):Q'),
    tooltip=['day(DATE)', 'mean(Count)','Time']
).properties(height=500, width=400).configure_title(fontSize=20)

fall21HeatMap

### Fall2019 GridHeat Map

you can use your mouse and view additional data by hovering over the graphs.

A [heatmap](https://chartio.com/learn/charts/heatmap-complete-guide/) depicts values for a main variable of interest across two axis variables as a grid of colored squares. The axis variables are divided into ranges like a bar chart or histogram, and each cell’s color indicates the value of the main variable in the corresponding cell range.