# **2. Exploratory Analysis**

# Setup

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from scipy import stats

# Import Data

**Import Cleaned 2021 SHS Statistics Data**

In [2]:
df = pd.read_excel("2_data/Cleaned 2021 SHS Statistics Data.xlsx", index_col=False);
df[0:1]

Unnamed: 0,Date,Time Range,Task Category,Escalated to SD?,Student Contact,Description
0,2021-01-04,07:00 - 07:30,Academic Advising,No,Outlook/Max Hu,Academic Advising


**Import data file: "Days in 2021.xlsx"**

> This data file contains all of the dates in 2021 and indicates whether the date was a work day or holiday (i.e., intersession, public holiday, work holiday, etc.)

In [3]:
daysInTheYear = pd.read_excel("2_data/Days in 2021.xlsx", index_col=False);
daysInTheYear["Date"] = pd.to_datetime(daysInTheYear["Date"]);
daysInTheYear.head()

Unnamed: 0,Date,Description,Notes
0,2021-01-01,Public Holiday,New Year Day
1,2021-01-02,Weekend,
2,2021-01-03,Weekend,
3,2021-01-04,Work Day,
4,2021-01-05,Work Day,


# Parameters and Functions

**Instantiate variables to serve as parameters for analysis**

In [4]:
# Raw semester date ranges:
winter = ['2021-01-01','2021-04-30'];
summer = ['2021-05-01','2021-08-26'];
fall = ['2021-08-27','2021-12-31'];
semester = [winter, summer, fall];
semesterNames = ["Winter","Summer","Fall"];

# Name of months:
monthName = ["January","February","March","April","May","June","July","August","September","October","November","December"];

# Days of the week:
dayOfWeek = ["Monday","Tuesday","Wednesday","Thursday","Friday"];

# Hours of operation in half-hour time ranges:
timeRange = ['07:00 - 07:30','07:30 - 08:00','08:00 - 08:30','08:30 - 09:00',
               '09:00 - 09:30','09:30 - 10:00','10:00 - 10:30','10:30 - 11:00',
               '11:00 - 11:30','11:30 - 12:00','12:00 - 12:30','12:30 - 13:00',
               '13:00 - 13:30','13:30 - 14:00','14:00 - 14:30','14:30 - 15:00',
               '15:00 - 15:30','15:30 - 16:00','16:00 - 16:30','16:30 - 17:00',
               '17:00 - 17:30','17:30 - 18:00','18:00 - 18:30','18:30 - 19:00',
               '19:00 - 19:30','19:30 - 20:00','20:00 - 20:30','20:30 - 21:00'];

# Time of Day (i.e., Morning = 07:00 - 12:00; Afternoon = 13:00 - 17:00; Evening = 17:00 - 21:00):
timeOfDay = [timeRange[0:9],timeRange[9:19], timeRange[19:]];
timeOfDayName = ["Morning","Afternoon","Evening"];

# Task categories:
taskCategory = ["Academic Advising",
                "Blackboard Issues",
                "Bookstore",
                "Career Centre",
                "Citrix",
                "CNA/Login Issues",
                "Douglas College International",
                "Enrollment Service",
                "Kaltura",
                "Library and Learning Centre",
                "Lockdown Browser",
                "Microsoft 365 Issues",
                "MyAccount Issues",
                "Others",
                "Printing",
                "WiFi Connectivity",
                "Zoom Help"];

**Function to calculate Descriptive Statistics**
> Measures of Central Tendency & Dispersion

In [5]:
def performDescriptiveStatistics(totalCasesPerTimePeriod, row):
  # Central Tendency --------------------------------------------------------------------------------------------
  meanTimePeriod = round(np.average(totalCasesPerTimePeriod), 2);
  row.append(meanTimePeriod);
    
  medianTimePeriod = np.median(totalCasesPerTimePeriod); 
  row.append(medianTimePeriod);
    
  modeTimePeriod = float(str(stats.mode(totalCasesPerTimePeriod)[0])[1]);
  row.append(modeTimePeriod);
  # row.append(stats.mode(totalCasesPerDayInTheMonth));
    
  minTimePeriod = min(totalCasesPerTimePeriod);
  row.append(minTimePeriod);
    
  maxTimePeriod = max(totalCasesPerTimePeriod);
  row.append(maxTimePeriod);

  # Measures of Variability --------------------------------------------------------------------------------------------
  rangeTimePeriod = np.ptp(totalCasesPerTimePeriod);
  row.append(rangeTimePeriod);
    
  firstQuartileTimePeriod = np.percentile(totalCasesPerTimePeriod, 25, interpolation="lower");
  row.append(firstQuartileTimePeriod);
    
  secondQuartileTimePeriod = np.percentile(totalCasesPerTimePeriod, 50, interpolation="lower");
  row.append(secondQuartileTimePeriod);
    
  thirdQuartileTimePeriod = np.percentile(totalCasesPerTimePeriod, 75, interpolation="lower");
  row.append(thirdQuartileTimePeriod);
    
  fourthQuartileTimePeriod = np.percentile(totalCasesPerTimePeriod, 100, interpolation="lower");
  row.append(fourthQuartileTimePeriod);
    
  iqrTimePeriod = stats.iqr(totalCasesPerTimePeriod, rng=(25, 75), interpolation="lower");
  row.append(iqrTimePeriod);

  absDevTimePeriod = round(float(pd.DataFrame(totalCasesPerTimePeriod).mad()), 2);
  row.append(absDevTimePeriod);

  varTimePeriod = round(np.var(totalCasesPerTimePeriod), 2);
  row.append(varTimePeriod);

  stdTimePeriod = round(np.std(totalCasesPerTimePeriod), 2);
  row.append(stdTimePeriod);

  skewTimePeriod = round(stats.skew(totalCasesPerTimePeriod), 2);
  row.append(skewTimePeriod);

  kurtosisTimePeriod = round(stats.kurtosis(totalCasesPerTimePeriod), 2);
  row.append(kurtosisTimePeriod);
  # --------------------------------------------------------------------------------------------

## Analysis - Date

### **Total Cases per Day**

In [6]:
def getTotalCasesPerDay(dataframe):
  workDays = [];  
  for i in daysInTheYear.index:
    if daysInTheYear.loc[i, "Description"] == "Work Day":
      workDays.append(daysInTheYear.loc[i, "Date"]);
  workDays = list(dict.fromkeys(workDays));

  for i in dataframe.index:
    workDays.append(dataframe.loc[i, "Date"]);
  workDays = list(dict.fromkeys(workDays));

  data = [];
  for day in workDays:
    row = [];
    count = 0;
    for i in dataframe.index:
      if day == dataframe.loc[i, "Date"]:
        count += 1;
    row.append(day);
    row.append(count);
    data.append(row);
  dfAgg = pd.DataFrame(data, columns=["Date","Total Cases"]);
  dfAgg = dfAgg.sort_values("Date", ascending=True);
  return dfAgg;

df_totalCasesPerDay = getTotalCasesPerDay(df);
df_totalCasesPerDay

Unnamed: 0,Date,Total Cases
0,2021-01-04,37
1,2021-01-05,27
2,2021-01-06,13
3,2021-01-07,12
4,2021-01-08,18
...,...,...
213,2021-12-13,6
214,2021-12-14,1
215,2021-12-15,4
216,2021-12-16,3


### **Descriptive Statistics of the Year**

In [7]:
def getDescriptiveStatisticsOfTheYear(dataframe_Day):
  labels = ["Year","Total Cases","Work Days","Work Days with no cases",
            "Mean","Median","Mode","Minimum","Maximum",
            "Range","1st Quartile","2nd Quartile","3rd Quartile","4th Quartile","Interquartile Range","Mean Absolute Deviation","Variance","Standard Deviation","Skewness","Kurtosis"];
  
  data = [];
  row = [];
  row.append(dataframe_Day["Date"][0].isocalendar()[0]);
  # row.append(str(theYear));
  totalCases = np.sum(dataframe_Day["Total Cases"]);
  row.append(totalCases);
  workDays = len(dataframe_Day["Total Cases"]);
  row.append(workDays);
  workDaysNoCases = len(dataframe_Day[dataframe_Day["Total Cases"] == 0]);
  row.append(workDaysNoCases);
  performDescriptiveStatistics(dataframe_Day["Total Cases"], row);
  
  data.append(row);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_descriptiveStatisticsOfTheYear = getDescriptiveStatisticsOfTheYear(df_totalCasesPerDay);
df_descriptiveStatisticsOfTheYear

Unnamed: 0,Year,Total Cases,Work Days,Work Days with no cases,Mean,Median,Mode,Minimum,Maximum,Range,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile,Interquartile Range,Mean Absolute Deviation,Variance,Standard Deviation,Skewness,Kurtosis
0,2021,1162,239,32,4.86,4.0,0.0,0,37,37,2,4,7,37,5,3.43,25.68,5.07,2.64,10.33


### **Descriptive Statistics per Semester**

In [8]:
def getDescriptiveStatisticsPerSemester(dataframe_Day, semester, semesterNames):
  labels = ["Semester","Total Cases","Work Days","Work Days with no cases",
            "Mean","Median","Mode","Minimum","Maximum",
            "Range","1st Quartile","2nd Quartile","3rd Quartile","4th Quartile",
            "Interquartile Range","Mean Absolute Deviation","Variance",
            "Standard Deviation","Skewness","Kurtosis"];
  
  data = [];
  for i in range(len(semester)):
    mask = (dataframe_Day["Date"] >= semester[i][0]) & (dataframe_Day["Date"] <= semester[i][1]);
    dataframeSemester = dataframe_Day[mask];
    row = [];
    row.append(semesterNames[i]);
    totalCases = np.sum(dataframeSemester["Total Cases"]);
    row.append(totalCases);
    workDays = len(dataframeSemester);
    row.append(workDays);
    workDaysNoCases = 0
    for i in dataframeSemester.index:
      if dataframeSemester.loc[i, "Total Cases"] == 0:
        workDaysNoCases += 1;
    row.append(workDaysNoCases);
    performDescriptiveStatistics(dataframeSemester["Total Cases"], row);
    data.append(row);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_descriptiveStatisticsPerSemester = getDescriptiveStatisticsPerSemester(df_totalCasesPerDay, semester, semesterNames)
df_descriptiveStatisticsPerSemester

Unnamed: 0,Semester,Total Cases,Work Days,Work Days with no cases,Mean,Median,Mode,Minimum,Maximum,Range,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile,Interquartile Range,Mean Absolute Deviation,Variance,Standard Deviation,Skewness,Kurtosis
0,Winter,454,82,6,5.54,4.0,3.0,0,37,37,2,4,7,37,5,3.7,32.69,5.72,2.92,11.51
1,Summer,267,80,20,3.34,3.0,0.0,0,12,12,0,3,5,12,5,2.56,9.5,3.08,0.79,-0.14
2,Fall,441,77,6,5.73,4.0,4.0,0,28,28,2,4,7,28,5,3.88,31.39,5.6,2.03,4.59


### **Descriptive Statistics per Month**

In [9]:
# Reference:
# - https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.month_name.html
# - https://www.investopedia.com/terms/d/descriptive_statistics.asp#:~:text=Descriptive%20statistics%20summarizes%20or%20describes,of%20variability%20(or%20spread).&text=Measures%20of%20variability%20or%20spread,of%20data%20within%20the%20set.
# - https://towardsdatascience.com/statistics-central-tendency-5e514a2f98fd
# - https://www.hackerearth.com/blog/developers/descriptive-statistics-python-numpy/
# - https://stats.stackexchange.com/questions/156778/percentile-vs-quantile-vs-quartile
# - https://www.geeksforgeeks.org/absolute-deviation-and-absolute-mean-deviation-using-numpy-python/

def getDescriptiveStatisticsPerMonth(dataframe_Day, monthName):
  labels = ["Month","Total Cases","Work Days","Work Days with no cases",
            "Mean","Median","Mode","Minimum","Maximum",
            "Range","1st Quartile","2nd Quartile","3rd Quartile","4th Quartile",
            "Interquartile Range","Mean Absolute Deviation","Variance",
            "Standard Deviation","Skewness","Kurtosis"];
  
  data = [];
  for month in monthName:
    row = [];
    totalCasesPerDayInTheMonth = [];
    for i in dataframe_Day.index:
      if dataframe_Day.loc[i, "Date"].month_name() == month:
        totalCasesPerDayInTheMonth.append(dataframe_Day.loc[i, "Total Cases"]);
    
    row.append(month);
    sumMonth = np.sum(totalCasesPerDayInTheMonth);
    row.append(sumMonth);
    workDays = len(totalCasesPerDayInTheMonth);
    row.append(workDays);
    workDaysNoCases = totalCasesPerDayInTheMonth.count(0);
    row.append(workDaysNoCases);
    performDescriptiveStatistics(totalCasesPerDayInTheMonth, row);
    
    data.append(row);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_descriptiveStatisticsPerMonth = getDescriptiveStatisticsPerMonth(df_totalCasesPerDay, monthName);
df_descriptiveStatisticsPerMonth

Unnamed: 0,Month,Total Cases,Work Days,Work Days with no cases,Mean,Median,Mode,Minimum,Maximum,Range,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile,Interquartile Range,Mean Absolute Deviation,Variance,Standard Deviation,Skewness,Kurtosis
0,January,201,20,1,10.05,7.5,1.0,0,37,37,3,7,13,37,10,6.87,82.75,9.1,1.46,1.76
1,February,82,19,1,4.32,4.0,3.0,0,11,11,2,4,5,11,3,2.16,7.16,2.68,0.71,0.07
2,March,100,23,1,4.35,4.0,2.0,0,10,10,2,4,6,10,4,2.39,7.71,2.78,0.48,-0.72
3,April,71,20,3,3.55,3.0,4.0,0,10,10,1,3,4,10,3,2.21,8.25,2.87,0.89,0.0
4,May,118,20,0,5.9,6.0,6.0,1,12,11,4,6,8,12,4,1.92,6.39,2.53,0.19,0.09
5,June,17,22,14,0.77,0.0,0.0,0,4,4,0,0,1,4,1,0.98,1.45,1.2,1.38,0.62
6,July,89,21,3,4.24,4.0,0.0,0,12,12,2,4,7,12,5,2.58,10.18,3.19,0.65,-0.25
7,August,62,20,3,3.1,2.5,2.0,0,9,9,1,2,4,9,3,2.04,6.89,2.62,0.89,-0.27
8,September,195,20,1,9.75,8.0,6.0,0,28,28,4,8,12,28,8,6.1,60.39,7.77,1.0,0.14
9,October,66,20,4,3.3,3.5,0.0,0,8,8,1,3,5,8,4,2.1,6.11,2.47,0.21,-1.03


### **Descriptive Statistics per Day of Week**

In [10]:
def getDescriptiveStatisticsPerDayOfWeek(dataframe_Day, dayOfWeek):
  labels = ["Weekday","Total Cases","Work Days","Work Days with no cases",
            "Mean","Median","Mode","Minimum","Maximum",
            "Range","1st Quartile","2nd Quartile","3rd Quartile","4th Quartile","Interquartile Range","Mean Absolute Deviation","Variance","Standard Deviation","Skewness","Kurtosis"];

  data = [];
  for day in dayOfWeek:
    row = [];
    totalCasesPerDayOfWeek = [];
    for i in dataframe_Day.index:
      if pd.Timestamp(dataframe_Day.loc[i, "Date"]).day_name() == day:
        totalCasesPerDayOfWeek.append(dataframe_Day.loc[i, "Total Cases"]);
    
    row.append(day);
    totalCases = np.sum(totalCasesPerDayOfWeek);
    row.append(totalCases);
    workDays = len(totalCasesPerDayOfWeek);
    row.append(workDays);
    workDaysNoCases = totalCasesPerDayOfWeek.count(0);
    row.append(workDaysNoCases);
    performDescriptiveStatistics(totalCasesPerDayOfWeek, row);
    data.append(row);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_descriptiveStatisticsPerDayOfWeek = getDescriptiveStatisticsPerDayOfWeek(df_totalCasesPerDay, dayOfWeek)
df_descriptiveStatisticsPerDayOfWeek

Unnamed: 0,Weekday,Total Cases,Work Days,Work Days with no cases,Mean,Median,Mode,Minimum,Maximum,Range,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile,Interquartile Range,Mean Absolute Deviation,Variance,Standard Deviation,Skewness,Kurtosis
0,Monday,300,43,3,6.98,6.0,4.0,0,37,37,4,6,8,37,4,3.7,36.72,6.06,2.91,11.67
1,Tuesday,278,50,7,5.56,4.0,0.0,0,28,28,1,4,8,28,7,4.03,33.21,5.76,2.15,5.8
2,Wednesday,199,50,7,3.98,3.0,1.0,0,26,26,1,3,5,26,4,3.06,21.18,4.6,2.63,8.79
3,Thursday,201,46,6,4.37,3.0,3.0,0,22,22,2,3,6,22,4,2.81,15.49,3.94,2.08,6.6
4,Friday,184,50,9,3.68,3.0,2.0,0,21,21,1,3,5,21,4,2.59,15.82,3.98,2.54,7.89


#### ***Descriptive Statistics per Day of Week in a particular semester***

In [11]:
def getDescriptiveStatisticsPerDayOfWeekInSemester(dataframe_Day, dayOfWeek, dateRange):
  mask = (dataframe_Day["Date"] >= dateRange[0]) & (dataframe_Day["Date"] <= dateRange[1]);
  dataframeSemester = dataframe_Day[mask];

  labels = ["Weekday","Total Cases","Work Days","Work Days with no cases",
            "Mean","Median","Mode","Minimum","Maximum",
            "Range","1st Quartile","2nd Quartile","3rd Quartile","4th Quartile","Interquartile Range","Mean Absolute Deviation","Variance","Standard Deviation","Skewness","Kurtosis"];
  dayOfWeek = ["Monday","Tuesday","Wednesday","Thursday","Friday"];
  
  data = [];
  for day in dayOfWeek:
    row = [];
    totalCasesPerDayOfWeek = [];
    for i in dataframeSemester.index:
      if pd.Timestamp(dataframeSemester.loc[i, "Date"]).day_name() == day:
        totalCasesPerDayOfWeek.append(dataframeSemester.loc[i, "Total Cases"]);
    
    row.append(day);
    totalCases = np.sum(totalCasesPerDayOfWeek);
    row.append(totalCases);
    workDays = len(totalCasesPerDayOfWeek);
    row.append(workDays);
    workDaysNoCases = totalCasesPerDayOfWeek.count(0);
    row.append(workDaysNoCases);
    performDescriptiveStatistics(totalCasesPerDayOfWeek, row);
    data.append(row);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

Descriptive Statistics per Day of Week in Winter 2021

In [12]:
df_descriptiveStatisticsPerDayOfWeekInWinter = getDescriptiveStatisticsPerDayOfWeekInSemester(df_totalCasesPerDay, dayOfWeek, winter);
df_descriptiveStatisticsPerDayOfWeekInWinter

Unnamed: 0,Weekday,Total Cases,Work Days,Work Days with no cases,Mean,Median,Mode,Minimum,Maximum,Range,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile,Interquartile Range,Mean Absolute Deviation,Variance,Standard Deviation,Skewness,Kurtosis
0,Monday,138,15,0,9.2,7.0,4.0,2,37,35,4,7,8,37,4,5.25,70.43,8.39,2.42,5.23
1,Tuesday,119,17,2,7.0,6.0,1.0,0,27,27,3,6,10,27,7,4.59,39.88,6.32,1.65,3.26
2,Wednesday,56,17,1,3.29,2.0,1.0,0,13,13,1,2,5,13,4,2.15,8.8,2.97,1.93,3.99
3,Thursday,63,16,1,3.94,3.0,3.0,0,12,12,2,3,6,12,4,2.16,7.68,2.77,1.42,1.94
4,Friday,78,17,2,4.59,4.0,3.0,0,18,18,2,4,5,18,3,2.81,17.3,4.16,1.88,3.76


Descriptive Statistics per Day of Week in Summer 2021

In [13]:
df_descriptiveStatisticsPerDayOfWeekInSummer = getDescriptiveStatisticsPerDayOfWeekInSemester(df_totalCasesPerDay, dayOfWeek, summer);
df_descriptiveStatisticsPerDayOfWeekInSummer

Unnamed: 0,Weekday,Total Cases,Work Days,Work Days with no cases,Mean,Median,Mode,Minimum,Maximum,Range,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile,Interquartile Range,Mean Absolute Deviation,Variance,Standard Deviation,Skewness,Kurtosis
0,Monday,65,14,2,4.64,4.0,3.0,0,12,12,3,3,6,12,3,2.79,11.09,3.33,0.48,-0.39
1,Tuesday,59,17,4,3.47,2.0,0.0,0,9,9,1,2,6,9,5,2.79,9.66,3.11,0.44,-1.31
2,Wednesday,49,17,5,2.88,1.0,0.0,0,12,12,0,1,4,12,4,2.69,11.4,3.38,1.29,0.79
3,Thursday,51,16,5,3.19,3.0,0.0,0,9,9,0,3,4,9,4,2.46,8.9,2.98,0.59,-0.82
4,Friday,43,16,4,2.69,2.0,2.0,0,7,7,0,2,4,7,4,1.77,4.34,2.08,0.29,-0.85


Descriptive Statistics per Day of Week in Fall 2021

In [14]:
df_descriptiveStatisticsPerDayOfWeekInFall = getDescriptiveStatisticsPerDayOfWeekInSemester(df_totalCasesPerDay, dayOfWeek, fall);
df_descriptiveStatisticsPerDayOfWeekInFall

Unnamed: 0,Weekday,Total Cases,Work Days,Work Days with no cases,Mean,Median,Mode,Minimum,Maximum,Range,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile,Interquartile Range,Mean Absolute Deviation,Variance,Standard Deviation,Skewness,Kurtosis
0,Monday,97,14,1,6.93,7.0,4.0,0,15,15,4,7,8,15,4,3.08,15.49,3.94,0.26,-0.5
1,Tuesday,100,16,1,6.25,4.5,6.0,0,28,28,2,4,6,28,4,4.38,43.81,6.62,2.17,4.53
2,Wednesday,94,16,1,5.88,4.0,2.0,0,26,26,2,4,6,26,4,4.22,39.36,6.27,2.12,4.01
3,Thursday,87,14,0,6.21,5.0,1.0,1,22,21,3,5,7,22,4,3.56,26.74,5.17,1.88,3.45
4,Friday,63,17,3,3.71,2.0,0.0,0,21,21,1,2,4,21,3,3.03,23.38,4.84,2.62,6.74


### **Descriptive Statistics per Week**

In [15]:
def getDescriptiveStatisticsPerWeek(dataframe_Day):
  labels = ["Week","Total Cases","Work Days","Work Days with no cases",
            "Mean","Median","Mode","Minimum","Maximum",
            "Range","1st Quartile","2nd Quartile","3rd Quartile","4th Quartile","Interquartile Range","Mean Absolute Deviation","Variance","Standard Deviation","Skewness","Kurtosis"];
  
  workWeekNumbers = [];
  for i in dataframe_Day.index:
    workWeekNumbers.append(dataframe_Day.loc[i, "Date"].isocalendar()[1]);
  workWeekNumbers = list(dict.fromkeys(workWeekNumbers));

  data = [];
  for week in workWeekNumbers:
    row = [];
    totalCasesPerWeek = [];
    for i in dataframe_Day.index:
      if week == dataframe_Day.loc[i, "Date"].isocalendar()[1]:
        totalCasesPerWeek.append(dataframe_Day.loc[i, "Total Cases"]);
    
    row.append(week);
    totalCases = np.sum(totalCasesPerWeek);
    row.append(totalCases);
    workDays = len(totalCasesPerWeek);
    row.append(workDays);
    workDaysNoCases = totalCasesPerWeek.count(0);
    row.append(workDaysNoCases);
    performDescriptiveStatistics(totalCasesPerWeek, row);
    data.append(row);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_descriptiveStatisticsPerWeek = getDescriptiveStatisticsPerWeek(df_totalCasesPerDay);
df_descriptiveStatisticsPerWeek

Unnamed: 0,Week,Total Cases,Work Days,Work Days with no cases,Mean,Median,Mode,Minimum,Maximum,Range,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile,Interquartile Range,Mean Absolute Deviation,Variance,Standard Deviation,Skewness,Kurtosis
0,1,107,5,0,21.4,18.0,1.0,12,37,25,13,18,27,37,14,8.48,89.04,9.44,0.6,-1.16
1,2,40,5,0,8.0,6.0,3.0,3,19,16,5,6,7,19,2,4.4,32.0,5.66,1.29,0.0
2,3,20,5,1,4.0,3.0,8.0,0,8,8,1,3,8,8,7,3.2,11.6,3.41,0.18,-1.74
3,4,34,5,0,6.8,6.0,1.0,1,13,12,4,6,10,13,6,3.76,18.16,4.26,0.14,-1.32
4,5,25,5,0,5.0,4.0,2.0,2,11,9,3,4,5,11,2,2.4,10.0,3.16,1.14,-0.21
5,6,21,5,0,4.2,4.0,3.0,3,6,3,3,4,5,6,2,1.04,1.36,1.17,0.36,-1.37
6,7,17,4,0,4.25,3.5,2.0,2,8,6,2,2,5,8,3,2.25,6.19,2.49,0.49,-1.37
7,8,19,5,1,3.8,3.0,0.0,0,8,8,1,3,7,8,6,2.96,10.16,3.19,0.18,-1.67
8,9,26,5,0,5.2,6.0,2.0,2,10,8,2,6,6,10,4,2.56,8.96,2.99,0.34,-1.15
9,10,19,5,0,3.8,3.0,3.0,2,8,6,3,3,3,8,0,1.68,4.56,2.14,1.37,0.11


### Load analyzed/aggregated data in regards to Date

In [16]:
dateData = pd.ExcelWriter("3_analyzedData/Date data.xlsx");

df_totalCasesPerDay.to_excel(dateData, sheet_name="Day", index=False);
df_descriptiveStatisticsOfTheYear.to_excel(dateData, sheet_name="Year", index=False);
df_descriptiveStatisticsPerSemester.to_excel(dateData, sheet_name="Semester", index=False);
df_descriptiveStatisticsPerMonth.to_excel(dateData, sheet_name="Month", index=False);
df_descriptiveStatisticsPerDayOfWeek.to_excel(dateData, sheet_name="Weekday", index=False);
df_descriptiveStatisticsPerDayOfWeekInWinter.to_excel(dateData, sheet_name="Weekday - Winter", index=False);
df_descriptiveStatisticsPerDayOfWeekInSummer.to_excel(dateData, sheet_name="Weekday - Summer", index=False);
df_descriptiveStatisticsPerDayOfWeekInFall.to_excel(dateData, sheet_name="Weekday - Fall", index=False);
df_descriptiveStatisticsPerWeek.to_excel(dateData, sheet_name="Week", index=False);

dateData.save();

## Analysis - Time Range

**Total Cases per Time Range**

In [17]:
def getTotalCasesPerTimeRange(dataframe, timeRange):
  data = [];
  for range in timeRange:
    row = [];
    count = 0;
    for i in dataframe.index:
      if dataframe.loc[i, "Time Range"] == range:
        count += 1;
    row.append(range);
    row.append(count);
    data.append(row);
  dfAgg = pd.DataFrame(data, columns=["Time Range","Total Cases"]);
  return dfAgg;

df_totalCasesPerTimeRange = getTotalCasesPerTimeRange(df, timeRange);
df_totalCasesPerTimeRange

Unnamed: 0,Time Range,Total Cases
0,07:00 - 07:30,180
1,07:30 - 08:00,40
2,08:00 - 08:30,37
3,08:30 - 09:00,40
4,09:00 - 09:30,37
5,09:30 - 10:00,38
6,10:00 - 10:30,50
7,10:30 - 11:00,43
8,11:00 - 11:30,36
9,11:30 - 12:00,39


***Total Cases per Time Range by Time of Day***

In [18]:
# Reference: https://www.britannica.com/dictionary/eb/qa/parts-of-the-day-early-morning-late-morning-etc

def getTotalCasesPerTimeRangeByTimeofDay(dataframe, timeOfDay, timeOfDayName):
    data = [];
    for i in range(len(timeOfDayName)):
        row = [timeOfDayName[i]];
        count = 0;
        for tRange in timeOfDay[i]:
            for j in dataframe.index:
                if dataframe.loc[j, "Time Range"] == tRange:
                    count += 1;
        row.append(count);
        data.append(row);
    labels = ["Time of Day","Total Cases"];
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

df_totalCasesPerTimeRangeByTimeOfDay = getTotalCasesPerTimeRangeByTimeofDay(df, timeOfDay, timeOfDayName);
df_totalCasesPerTimeRangeByTimeOfDay

Unnamed: 0,Time of Day,Total Cases
0,Morning,501
1,Afternoon,400
2,Evening,261


**Total Cases per Time Range by Semester**

In [19]:
def getTotalCasesPerTimeRangeBySemester(dataframe, timeRange, semester, semesterNames):
  data = [];
  for i in range(len(semesterNames)):
    mask = (dataframe["Date"] >= semester[i][0]) & (dataframe["Date"] <= semester[i][1]);
    dataframeSemester = dataframe[mask];
    row = [semesterNames[i]];
    for tRange in timeRange:
      count = 0;
      for j in dataframeSemester.index:
        if tRange == dataframeSemester.loc[j, "Time Range"]:
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Semester"];
  for tRange in timeRange:
    labels.append(tRange);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerTimeRangeBySemester = getTotalCasesPerTimeRangeBySemester(df, timeRange, semester, semesterNames);
df_totalCasesPerTimeRangeBySemester

Unnamed: 0,Semester,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,Winter,67,16,21,23,12,19,23,16,16,...,17,21,14,13,11,8,9,4,5,10
1,Summer,47,12,11,12,14,11,10,19,7,...,3,5,3,4,4,5,6,5,8,3
2,Fall,66,12,5,5,11,8,17,8,13,...,21,16,16,21,13,17,13,14,8,5


***Total Cases per Time Range by Semester & Time of Day***

In [20]:
def getTotalCasesPerTimeRangeBySemesterAndTimeOfDay(dataframe, semester, semesterNames, timeOfDay, timeOfDayName):
    data = [];
    for i in range(len(semesterNames)):
        mask = (dataframe["Date"] >= semester[i][0]) & (dataframe["Date"] <= semester[i][1]);
        dataframeSemester = dataframe[mask];
        row = [semesterNames[i]];
        for j in range(len(timeOfDay)):
            count = 0;
            for tRange in timeOfDay[j]:
                for k in dataframeSemester.index:
                    if dataframeSemester.loc[k, "Time Range"] == tRange:
                        count += 1;
            row.append(count);
        data.append(row);
    labels = ["Semester"];
    for tDay in timeOfDayName:
        labels.append(tDay);
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

df_totalCasesPerTimeRangeBySemesterAndTimeOfDay = getTotalCasesPerTimeRangeBySemesterAndTimeOfDay(df, semester, semesterNames, timeOfDay, timeOfDayName);
df_totalCasesPerTimeRangeBySemesterAndTimeOfDay

Unnamed: 0,Semester,Morning,Afternoon,Evening
0,Winter,213,146,95
1,Summer,143,81,43
2,Fall,145,173,123


**Total Cases per Time Range by Month**

In [21]:
def getTotalCasesPerTimeRangeByMonth(dataframe, timeRange, monthName):
  data = [];
  for month in monthName:
    row = [month];
    for tRange in timeRange:
      count = 0;
      mask = dataframe["Time Range"] == tRange;
      dataframeTimeRange = dataframe[mask];
      for i in dataframeTimeRange.index:
        if dataframeTimeRange.loc[i, "Date"].month_name() == month:
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Month"];
  for tRange in timeRange:
    labels.append(tRange);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerTimeRangeByMonth = getTotalCasesPerTimeRangeByMonth(df, timeRange, monthName);
df_totalCasesPerTimeRangeByMonth

Unnamed: 0,Month,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,January,22,9,11,18,3,7,11,11,8,...,4,8,6,6,6,3,2,3,2,5
1,February,13,1,3,2,2,4,2,0,2,...,10,8,0,2,3,2,3,1,0,1
2,March,11,4,1,2,3,6,6,3,5,...,1,5,4,3,0,3,4,0,1,4
3,April,21,2,6,1,4,2,4,2,1,...,2,0,4,2,2,0,0,0,2,0
4,May,12,6,5,3,8,6,3,13,2,...,3,1,1,2,1,3,3,1,4,2
5,June,7,2,0,1,2,1,0,2,0,...,0,0,0,0,0,0,0,0,0,0
6,July,18,2,6,4,3,1,6,2,4,...,0,4,2,2,2,2,1,3,4,1
7,August,17,3,0,5,1,3,1,3,2,...,0,0,1,2,2,0,2,1,0,0
8,September,25,7,4,3,4,6,7,5,6,...,7,6,8,10,7,5,2,7,3,3
9,October,4,1,1,0,2,1,6,1,0,...,5,3,1,0,1,3,3,2,2,1


***Total Cases per Time Range by Month & Time of Day***

In [22]:
def getTotalCasesPerTimeRangeByMonthAndTimeOfDay(dataframe, timeOfDay, timeOfDayName, monthName):
    data = [];
    for month in monthName:
        row = [month];
        for i in range(len(timeOfDay)):
            count = 0;
            for tRange in timeOfDay[i]:
                mask = dataframe["Time Range"] == tRange;
                dataframeTimeRange = dataframe[mask];
                for j in dataframeTimeRange.index:
                    if dataframeTimeRange.loc[j, "Date"].month_name() == month:
                        count += 1;
            row.append(count);
        data.append(row);
    labels = ["Month"];
    for tDay in timeOfDayName:
        labels.append(tDay);
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

df_totalCasesPerTimeRangeByMonthAndTimeOfDay = getTotalCasesPerTimeRangeByMonthAndTimeOfDay(df, timeOfDay, timeOfDayName, monthName);
df_totalCasesPerTimeRangeByMonthAndTimeOfDay

Unnamed: 0,Month,Morning,Afternoon,Evening
0,January,100,60,41
1,February,29,33,20
2,March,41,35,24
3,April,43,18,10
4,May,58,42,18
5,June,15,2,0
6,July,46,22,21
7,August,35,19,8
8,September,67,77,51
9,October,16,34,16


**Total Cases per Time Range by Day of Week**

In [23]:
def getTotalCasesPerTimeRangeByDayOfWeek(dataframe, timeRange, dayOfWeek):
  data = [];
  for day in dayOfWeek:
    row = [day];
    for tRange in timeRange:
      count = 0;
      mask = dataframe["Time Range"] == tRange;
      dataframeTimeRange = dataframe[mask];
      for i in dataframeTimeRange.index:
        if day == dataframeTimeRange.loc[i, "Date"].day_name():
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Weekday"];
  for tRange in timeRange:
    labels.append(tRange);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerTimeRangeByDayOfWeek = getTotalCasesPerTimeRangeByDayOfWeek(df, timeRange, dayOfWeek);
df_totalCasesPerTimeRangeByDayOfWeek

Unnamed: 0,Weekday,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,Monday,72,11,16,22,12,8,14,10,8,...,3,7,6,4,3,9,11,6,4,2
1,Tuesday,39,15,6,8,7,10,10,15,8,...,11,10,7,8,7,5,7,5,4,3
2,Wednesday,22,3,7,1,8,5,9,5,9,...,11,9,9,10,6,7,5,3,4,4
3,Thursday,18,7,4,2,5,6,8,7,4,...,8,9,7,10,7,6,2,6,6,2
4,Friday,29,4,4,7,5,9,9,6,7,...,8,7,4,6,5,3,3,3,3,7


***Total Cases per Time Range by Day of Week & Time of Day***

In [24]:
def getTotalCasesPerTimeRangeByDayOfWeekAndTimeOfDay(dataframe, timeOfDay, timeOfDayName, dayOfWeek):
    data = [];
    for day in dayOfWeek:
        row = [day];
        for i in range(len(timeOfDay)):
            count = 0;
            for tRange in timeOfDay[i]:
                mask = dataframe["Time Range"] == tRange;
                dataframeTimeRange = dataframe[mask];
                for j in dataframeTimeRange.index:
                    if day == dataframeTimeRange.loc[j, "Date"].day_name():
                        count += 1;
            row.append(count);
        data.append(row);
    labels = ["Weekday"];
    for tDay in timeOfDayName:
        labels.append(tDay);
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDay = getTotalCasesPerTimeRangeByDayOfWeekAndTimeOfDay(df, timeOfDay, timeOfDayName, dayOfWeek);
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDay

Unnamed: 0,Weekday,Morning,Afternoon,Evening
0,Monday,173,75,52
1,Tuesday,118,104,56
2,Wednesday,69,73,57
3,Thursday,61,85,55
4,Friday,80,63,41


***Total Cases per Time Range by Day of Week in a particular semester***

In [25]:
def getTotalCasesPerTimeRangeByDayOfWeekInSemester(dataframe, timeRange, dayOfWeek, dateRange):
  maskDate = (dataframe["Date"] >= dateRange[0]) & (dataframe["Date"] <= dateRange[1]);
  dataframeSemester = dataframe[maskDate];
  
  data = [];
  for day in dayOfWeek:
    row = [day];
    for tRange in timeRange:
      count = 0;
      mask = dataframeSemester["Time Range"] == tRange;
      dataframeTimeRange = dataframeSemester[mask];
      for i in dataframeTimeRange.index:
        if day == dataframeTimeRange.loc[i, "Date"].day_name():
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Weekday"];
  for tRange in timeRange:
    labels.append(tRange);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

Total Cases per Time Range by Day of Week in Winter 2021

In [26]:
df_totalCasesPerTimeRangeByDayOfWeekInWinter = getTotalCasesPerTimeRangeByDayOfWeekInSemester(df, timeRange, dayOfWeek, winter);
df_totalCasesPerTimeRangeByDayOfWeekInWinter

Unnamed: 0,Weekday,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,Monday,28,5,13,17,4,5,6,8,2,...,1,3,3,2,3,2,3,1,1,1
1,Tuesday,20,6,0,1,4,8,3,3,6,...,4,6,3,4,0,2,2,1,1,2
2,Wednesday,10,1,1,0,1,2,5,1,3,...,1,3,2,1,2,0,2,0,0,1
3,Thursday,3,3,4,1,1,1,3,1,1,...,5,6,3,5,3,2,0,1,2,1
4,Friday,6,1,3,4,2,3,6,3,4,...,6,3,3,1,3,2,2,1,1,5


Total Cases per Time Range by Day of Week in Summer 2021

In [27]:
df_totalCasesPerTimeRangeByDayOfWeekInSummer = getTotalCasesPerTimeRangeByDayOfWeekInSemester(df, timeRange, dayOfWeek, summer);
df_totalCasesPerTimeRangeByDayOfWeekInSummer

Unnamed: 0,Weekday,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,Monday,16,4,3,3,4,2,3,1,5,...,1,2,1,1,0,2,3,3,1,1
1,Tuesday,7,0,3,4,2,2,2,8,0,...,0,2,1,0,2,2,2,1,1,0
2,Wednesday,4,2,5,1,5,2,1,3,0,...,2,1,0,0,1,1,1,1,2,1
3,Thursday,10,3,0,1,3,3,4,6,2,...,0,0,0,1,1,0,0,0,2,0
4,Friday,10,3,0,3,0,2,0,1,0,...,0,0,1,2,0,0,0,0,2,1


Total Cases per Time Range by Day of Week in Fall 2021

In [28]:
df_totalCasesPerTimeRangeByDayOfWeekInFall = getTotalCasesPerTimeRangeByDayOfWeekInSemester(df, timeRange, dayOfWeek, fall);
df_totalCasesPerTimeRangeByDayOfWeekInFall

Unnamed: 0,Weekday,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,Monday,28,2,0,2,4,1,5,1,1,...,1,2,2,1,0,5,5,2,2,0
1,Tuesday,12,9,3,3,1,0,5,4,2,...,7,2,3,4,5,1,3,3,2,1
2,Wednesday,8,0,1,0,2,1,3,1,6,...,8,5,7,9,3,6,2,2,2,2
3,Thursday,5,1,0,0,1,2,1,0,1,...,3,3,4,4,3,4,2,5,2,1
4,Friday,13,0,1,0,3,4,3,2,3,...,2,4,0,3,2,1,1,2,0,1


***Total Cases per Time Range by Day of Week & Time of Day in a particular semester***

In [29]:
def getTotalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInSemester(dataframe, timeOfDay, timeOfDayName, dayOfWeek, dateRange):
    maskDate = (dataframe["Date"] >= dateRange[0]) & (dataframe["Date"] <= dateRange[1]);
    dataframeSemester = dataframe[maskDate];

    data = [];
    for day in dayOfWeek:
        row = [day];
        for i in range(len(timeOfDay)):
            count = 0;
            for tRange in timeOfDay[i]:
                mask = dataframeSemester["Time Range"] == tRange;
                dataframeTimeRange = dataframeSemester[mask];
                for j in dataframeTimeRange.index:
                    if day == dataframeTimeRange.loc[j, "Date"].day_name():
                        count += 1;
            row.append(count);
        data.append(row);
    
    labels = ["Weekday"];
    for tDay in timeOfDayName:
        labels.append(tDay);
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

Total Cases per Time Range by Day of Week & Time of Day in Winter 2021

In [30]:
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInWinter = getTotalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInSemester(df, timeOfDay, timeOfDayName, dayOfWeek, winter);
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInWinter

Unnamed: 0,Weekday,Morning,Afternoon,Evening
0,Monday,88,31,19
1,Tuesday,51,47,21
2,Wednesday,24,21,11
3,Thursday,18,22,23
4,Friday,32,25,21


Total Cases per Time Range by Day of Week & Time of Day in Summer 2021

In [31]:
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInSummer = getTotalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInSemester(df, timeOfDay, timeOfDayName, dayOfWeek, summer);
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInSummer

Unnamed: 0,Weekday,Morning,Afternoon,Evening
0,Monday,41,10,14
1,Tuesday,28,20,11
2,Wednesday,23,18,8
3,Thursday,32,15,4
4,Friday,19,18,6


Total Cases per Time Range by Day of Week & Time of Day in Fall 2021

In [32]:
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInFall = getTotalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInSemester(df, timeOfDay, timeOfDayName, dayOfWeek, fall);
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInFall

Unnamed: 0,Weekday,Morning,Afternoon,Evening
0,Monday,44,34,19
1,Tuesday,39,37,24
2,Wednesday,22,34,38
3,Thursday,11,48,28
4,Friday,29,20,14


**Total Cases per Time Range by Week**

In [33]:
def getTotalCasesPerTimeRangeByWeek(dataframe, timeRange):
  workWeekNumbers = [];
  for i in dataframe.index:
    workWeekNumbers.append(dataframe.loc[i, "Date"].isocalendar()[1]);
  workWeekNumbers = list(dict.fromkeys(workWeekNumbers));

  data = [];
  for week in workWeekNumbers:
    row = [week];
    for tRange in timeRange:
      count = 0;
      mask = dataframe["Time Range"] == tRange;
      dataframeTimeRange = dataframe[mask];
      for i in dataframeTimeRange.index:
        if week == dataframeTimeRange.loc[i, "Date"].isocalendar()[1]:
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Week"];
  for tRange in timeRange:
    labels.append(tRange);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerTimeRangeByWeek = getTotalCasesPerTimeRangeByWeek(df, timeRange);
df_totalCasesPerTimeRangeByWeek

Unnamed: 0,Week,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,1,11,6,6,16,2,6,5,7,5,...,0,5,5,3,1,1,0,1,1,2
1,2,5,0,2,1,0,0,3,1,3,...,3,2,1,0,3,0,0,1,0,1
2,3,3,2,0,0,1,1,0,2,0,...,0,0,0,1,1,1,1,0,0,0
3,4,3,1,3,1,0,0,3,1,0,...,1,1,0,2,1,1,1,1,1,2
4,5,5,0,1,0,0,3,0,0,1,...,2,1,0,0,0,0,2,0,0,1
5,6,3,0,1,0,0,1,1,0,1,...,3,3,0,1,0,0,1,0,0,0
6,7,3,1,0,1,2,0,1,0,0,...,2,0,0,0,1,2,0,1,0,0
7,8,2,0,1,1,0,0,0,0,0,...,3,4,0,1,2,0,0,0,0,0
8,9,2,2,0,0,0,2,2,2,1,...,1,1,0,3,0,0,0,0,1,1
9,10,4,1,0,0,1,1,0,0,1,...,0,1,1,0,0,1,2,0,0,1


***Total Cases per Time Range by Week & Time of Day***

In [34]:
def getTotalCasesPerTimeRangeByWeekAndTimeOfDay(dataframe, timeOfDay, timeOfDayName):
    workWeekNumbers = [];
    for i in dataframe.index:
        workWeekNumbers.append(dataframe.loc[i, "Date"].isocalendar()[1]);
    workWeekNumbers = list(dict.fromkeys(workWeekNumbers));

    data = [];
    for week in workWeekNumbers:
        row = [week];
        for i in range(len(timeOfDay)):
            count = 0;
            for tRange in timeOfDay[i]:
                mask = dataframe["Time Range"] == tRange;
                dataframeTimeRange = dataframe[mask];
                for j in dataframeTimeRange.index:
                    if dataframeTimeRange.loc[j, "Date"].isocalendar()[1] == week:
                        count += 1;
            row.append(count);
        data.append(row);
    labels = ["Week"];
    for tDay in timeOfDayName:
        labels.append(tDay);
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

df_totalCasesPerTimeRangeByWeekAndTimeOfDay = getTotalCasesPerTimeRangeByWeekAndTimeOfDay(df, timeOfDay, timeOfDayName);
df_totalCasesPerTimeRangeByWeekAndTimeOfDay

Unnamed: 0,Week,Morning,Afternoon,Evening
0,1,64,24,19
1,2,15,17,8
2,3,9,7,4
3,4,12,12,10
4,5,10,11,4
5,6,7,9,5
6,7,8,5,4
7,8,4,8,7
8,9,11,9,6
9,10,8,5,6


### Load analyzed/aggregated data in regards to Time Range

In [35]:
timeRangeData = pd.ExcelWriter("3_analyzedData/Time Range data.xlsx");

df_totalCasesPerTimeRange.to_excel(timeRangeData, sheet_name="Time Range", index=False);
df_totalCasesPerTimeRangeByTimeOfDay.to_excel(timeRangeData, sheet_name="Time of Day", index=False);

df_totalCasesPerTimeRangeBySemester.to_excel(timeRangeData, sheet_name="Semester", index=False);
df_totalCasesPerTimeRangeBySemesterAndTimeOfDay.to_excel(timeRangeData, sheet_name="Semester - Time of Day", index=False);

df_totalCasesPerTimeRangeByMonth.to_excel(timeRangeData, sheet_name="Month", index=False);
df_totalCasesPerTimeRangeByMonthAndTimeOfDay.to_excel(timeRangeData, sheet_name="Month - Time of Day", index=False);

df_totalCasesPerTimeRangeByDayOfWeek.to_excel(timeRangeData, sheet_name="Weekday", index=False);
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDay.to_excel(timeRangeData, sheet_name="Weekday - Time of Day", index=False);

df_totalCasesPerTimeRangeByDayOfWeekInWinter.to_excel(timeRangeData, sheet_name="Weekday - Winter", index=False);
df_totalCasesPerTimeRangeByDayOfWeekInSummer.to_excel(timeRangeData, sheet_name="Weekday - Summer", index=False);
df_totalCasesPerTimeRangeByDayOfWeekInFall.to_excel(timeRangeData, sheet_name="Weekday - Fall", index=False);
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInWinter.to_excel(timeRangeData, sheet_name="Weekday - Time of Day (Winter)", index=False);
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInSummer.to_excel(timeRangeData, sheet_name="Weekday - Time of Day (Summer)", index=False);
df_totalCasesPerTimeRangeByDayOfWeekAndTimeOfDayInFall.to_excel(timeRangeData, sheet_name="Weekday - Time of Day (Fall)", index=False);

df_totalCasesPerTimeRangeByWeek.to_excel(timeRangeData, sheet_name="Week", index=False);
df_totalCasesPerTimeRangeByWeekAndTimeOfDay.to_excel(timeRangeData, sheet_name="Week - Time of Day", index=False);

timeRangeData.save();

## Analysis - Task Category

**Total Cases per Task Category**

In [36]:
def getTotalCasesPerTaskCategory(dataframe, taskCategory):
  data = [];
  for category in taskCategory:
    row = [];
    count = 0;
    for i in dataframe.index:
      if dataframe.loc[i, "Task Category"] == category:
        count += 1;
    row.append(category);
    row.append(count);
    data.append(row);
  dfAgg = pd.DataFrame(data, columns=["Task Category","Total Cases"]);
  dfAgg = dfAgg.sort_values("Total Cases", ascending=False);
  return dfAgg;

df_totalCasesPerTaskCategory = getTotalCasesPerTaskCategory(df, taskCategory);
df_totalCasesPerTaskCategory

Unnamed: 0,Task Category,Total Cases
1,Blackboard Issues,304
12,MyAccount Issues,206
5,CNA/Login Issues,190
11,Microsoft 365 Issues,153
13,Others,81
0,Academic Advising,46
7,Enrollment Service,30
10,Lockdown Browser,27
8,Kaltura,26
4,Citrix,23


**Total Cases per Task Category by Semester**

In [37]:
def getTotalCasesPerTaskCategoryBySemester(dataframe, taskCategory, semester, semesterNames):
  data = [];
  for i in range(len(semesterNames)):
    mask = (dataframe["Date"] >= semester[i][0]) & (dataframe["Date"] <= semester[i][1]);
    dataframeSemester = dataframe[mask];
    row = [semesterNames[i]];
    for category in taskCategory:
      count = 0;
      for j in dataframeSemester.index:
        if category == dataframeSemester.loc[j, "Task Category"]:
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Semester"];
  for category in taskCategory:
    labels.append(category);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerTaskCategoryBySemester = getTotalCasesPerTaskCategoryBySemester(df, taskCategory, semester, semesterNames);
df_totalCasesPerTaskCategoryBySemester

Unnamed: 0,Semester,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,Winter,14,136,8,0,15,97,6,3,9,8,20,70,35,22,7,0,4
1,Summer,16,53,0,0,8,38,7,18,17,0,7,28,48,20,2,0,5
2,Fall,16,115,1,0,0,55,6,9,0,0,0,55,123,39,1,14,7


**Total Cases per Task Category by Month**

In [38]:
def getTotalCasesPerTaskCategoryByMonth(dataframe, taskCategory, monthName):
  data = [];
  for month in monthName:
    row = [month];
    for category in taskCategory:
      count = 0;
      mask = dataframe["Task Category"] == category;
      dataframeCategory = dataframe[mask];
      for i in dataframeCategory.index:
        if dataframeCategory.loc[i, "Date"].month_name() == month:
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Month"];
  for category in taskCategory:
    labels.append(category);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerTaskCategoryByMonth = getTotalCasesPerTaskCategoryByMonth(df, taskCategory, monthName);
df_totalCasesPerTaskCategoryByMonth

Unnamed: 0,Month,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,January,3,82,8,0,9,36,1,1,3,2,7,32,4,9,2,0,2
1,February,1,23,0,0,0,21,2,0,3,2,3,15,3,7,2,0,0
2,March,5,18,0,0,1,27,1,0,2,1,3,16,20,4,1,0,1
3,April,5,13,0,0,5,13,2,2,1,3,7,7,8,2,2,0,1
4,May,13,32,0,0,6,12,2,7,7,0,6,12,4,14,1,0,2
5,June,0,2,0,0,0,1,0,0,3,0,1,1,7,2,0,0,0
6,July,3,13,0,0,2,18,5,7,6,0,0,10,17,4,1,0,3
7,August,2,11,0,0,0,11,0,4,1,0,0,5,28,0,0,0,0
8,September,5,70,1,0,0,20,0,4,0,0,0,29,42,12,0,12,0
9,October,0,15,0,0,0,7,2,0,0,0,0,19,17,5,0,1,0


**Total Cases per Task Category by Day of Week**

In [39]:
def getTotalCasesPerTaskCategoryByDayOfWeek(dataframe, taskCategory, dayOfWeek):
  data = [];
  for day in dayOfWeek:
    row = [day];
    for category in taskCategory:
      count = 0;
      mask = dataframe["Task Category"] == category;
      dataframeCategory = dataframe[mask];
      for i in dataframeCategory.index:
        if day == dataframeCategory.loc[i, "Date"].day_name():
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Weekday"];
  for category in taskCategory:
    labels.append(category);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerTaskCategoryByDayOfWeek = getTotalCasesPerTaskCategoryByDayOfWeek(df, taskCategory, dayOfWeek);
df_totalCasesPerTaskCategoryByDayOfWeek

Unnamed: 0,Weekday,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,Monday,14,92,5,0,4,44,4,7,8,1,5,28,54,22,3,2,7
1,Tuesday,11,78,2,0,7,42,8,6,3,1,9,41,48,14,3,4,1
2,Wednesday,3,62,0,0,2,35,3,7,3,0,1,28,32,11,1,5,6
3,Thursday,10,44,1,0,5,33,1,7,9,1,6,31,35,14,1,2,1
4,Friday,8,28,1,0,5,36,3,3,3,5,6,25,37,20,2,1,1


***Total Cases per Task Category by Day of Week in a particular semester***

In [40]:
def getTotalCasesPerTaskCategoryByDayOfWeekInSemester(dataframe, taskCategory, dayOfWeek, dateRange):
  maskDate = (dataframe["Date"] >= dateRange[0]) & (dataframe["Date"] <= dateRange[1]);
  dataframeSemester = dataframe[maskDate];
  
  data = [];
  for day in dayOfWeek:
    row = [day];
    for category in taskCategory:
      count = 0;
      mask = dataframeSemester["Task Category"] == category;
      dataframeCategory = dataframeSemester[mask];
      for i in dataframeCategory.index:
        if day == dataframeCategory.loc[i, "Date"].day_name():
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Weekday"];
  for category in taskCategory:
    labels.append(category);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

Total Cases per Task Category by Day of Week in Winter 2021

In [41]:
df_totalCasesPerTaskCategoryByDayOfWeekInWinter = getTotalCasesPerTaskCategoryByDayOfWeekInSemester(df, taskCategory, dayOfWeek, winter);
df_totalCasesPerTaskCategoryByDayOfWeekInWinter

Unnamed: 0,Weekday,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,Monday,5,47,5,0,4,26,3,2,1,1,5,13,13,9,2,0,2
1,Tuesday,3,39,1,0,4,24,2,1,1,1,7,20,7,6,2,0,1
2,Wednesday,1,19,0,0,0,16,1,0,1,0,1,11,3,1,1,0,1
3,Thursday,3,16,1,0,3,10,0,0,3,1,2,14,7,2,1,0,0
4,Friday,2,15,1,0,4,21,0,0,3,5,5,12,5,4,1,0,0


Total Cases per Task Category by Day of Week in Summer 2021

In [42]:
df_totalCasesPerTaskCategoryByDayOfWeekInSummer = getTotalCasesPerTaskCategoryByDayOfWeekInSemester(df, taskCategory, dayOfWeek, summer);
df_totalCasesPerTaskCategoryByDayOfWeekInSummer

Unnamed: 0,Weekday,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,Monday,3,13,0,0,0,8,0,2,7,0,0,9,15,6,0,0,2
1,Tuesday,4,15,0,0,3,6,3,4,2,0,2,2,11,6,1,0,0
2,Wednesday,1,9,0,0,2,9,2,6,2,0,0,9,5,3,0,0,1
3,Thursday,6,10,0,0,2,6,1,3,6,0,4,3,8,1,0,0,1
4,Friday,2,6,0,0,1,9,1,3,0,0,1,5,9,4,1,0,1


Total Cases per Task Category by Day of Week in Fall 2021

In [43]:
df_totalCasesPerTaskCategoryByDayOfWeekInFall = getTotalCasesPerTaskCategoryByDayOfWeekInSemester(df, taskCategory, dayOfWeek, fall);
df_totalCasesPerTaskCategoryByDayOfWeekInFall

Unnamed: 0,Weekday,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,Monday,6,32,0,0,0,10,1,3,0,0,0,6,26,7,1,2,3
1,Tuesday,4,24,1,0,0,12,3,1,0,0,0,19,30,2,0,4,0
2,Wednesday,1,34,0,0,0,10,0,1,0,0,0,8,24,7,0,5,4
3,Thursday,1,18,0,0,0,17,0,4,0,0,0,14,20,11,0,2,0
4,Friday,4,7,0,0,0,6,2,0,0,0,0,8,23,12,0,1,0


**Total Cases per Task Category by Week**

In [44]:
def getTotalCasesPerTaskCategoryByWeek(dataframe, taskCategory):
  workWeekNumbers = [];
  for i in dataframe.index:
    workWeekNumbers.append(dataframe.loc[i, "Date"].isocalendar()[1]);
  workWeekNumbers = list(dict.fromkeys(workWeekNumbers));

  data = [];
  for week in workWeekNumbers:
    row = [week];
    for category in taskCategory:
      count = 0;
      mask = dataframe["Task Category"] == category;
      dataframeCategory = dataframe[mask];
      for i in dataframeCategory.index:
        if dataframeCategory.loc[i, "Date"].isocalendar()[1] == week:
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Week"];
  for category in taskCategory:
    labels.append(category);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerTaskCategoryByWeek = getTotalCasesPerTaskCategoryByWeek(df, taskCategory);
df_totalCasesPerTaskCategoryByWeek

Unnamed: 0,Week,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,1,1,53,6,0,3,27,0,1,2,1,1,6,3,2,0,0,1
1,2,1,17,2,0,2,4,1,0,0,0,1,7,0,5,0,0,0
2,3,1,5,0,0,4,3,0,0,0,0,0,6,0,1,0,0,0
3,4,0,7,0,0,0,2,0,0,1,1,5,13,1,1,2,0,1
4,5,0,4,0,0,0,5,1,0,0,1,2,9,1,1,1,0,0
5,6,0,10,0,0,0,2,1,0,2,0,0,1,1,4,0,0,0
6,7,1,4,0,0,0,10,0,0,0,0,0,1,0,1,0,0,0
7,8,0,5,0,0,0,4,0,0,1,1,1,4,1,1,1,0,0
8,9,4,6,0,0,0,5,0,0,0,0,1,4,6,0,0,0,0
9,10,1,3,0,0,1,7,0,0,0,1,0,2,4,0,0,0,0


**Total Cases per Task Category by Time Range**

In [45]:
def getTotalCasesPerTaskCategoryByTimeRange(dataframe, taskCategory, timeRange):
  data = [];
  for category in taskCategory:
    row = [category];
    for tRange in timeRange:
      count = 0;
      for i in dataframe.index:
        if (dataframe.loc[i, "Task Category"] == category) & (dataframe.loc[i, "Time Range"] == tRange):
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Task Category"];
  for tRange in timeRange:
    labels.append(tRange);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerTaskCategoryByTimeRange = getTotalCasesPerTaskCategoryByTimeRange(df, taskCategory, timeRange);
df_totalCasesPerTaskCategoryByTimeRange

Unnamed: 0,Task Category,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,Academic Advising,16,2,0,0,3,0,2,3,0,...,1,0,0,2,0,0,2,0,1,1
1,Blackboard Issues,37,12,10,15,9,12,13,12,12,...,13,12,10,16,11,10,9,5,4,2
2,Bookstore,3,0,2,1,0,0,0,2,0,...,0,0,0,0,0,0,0,1,0,0
3,Career Centre,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Citrix,3,1,2,1,2,0,0,0,0,...,0,1,1,0,2,1,0,0,1,1
5,CNA/Login Issues,33,7,3,5,3,7,8,7,4,...,5,9,6,6,4,5,4,3,3,3
6,Douglas College International,4,1,0,0,0,0,2,0,0,...,0,0,1,0,1,0,0,2,1,0
7,Enrollment Service,5,1,1,2,0,2,1,2,0,...,0,0,1,0,1,1,0,1,2,1
8,Kaltura,3,1,0,0,0,3,3,2,0,...,2,2,0,1,0,1,1,1,1,2
9,Library and Learning Centre,0,1,0,0,0,0,1,0,1,...,1,0,1,0,1,0,0,0,0,0


***Total Cases per Task Category by Time of Day***

In [46]:
def getTotalCasesPerTaskCategoryByTimeOfDay(dataframe, taskCategory, timeOfDay, timeOfDayName):
    data = [];
    for category in taskCategory:
        row = [category];
        for i in range(len(timeOfDay)):
            count = 0;
            for tRange in timeOfDay[i]:
                mask = dataframe["Time Range"] == tRange;
                dataframeTimeRange = dataframe[mask];
                for j in dataframeTimeRange.index:
                    if dataframeTimeRange.loc[j, "Task Category"] == category:
                        count += 1;
            row.append(count);
        data.append(row);
    labels = ["Task Category"];
    for tDay in timeOfDayName:
        labels.append(tDay);
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

df_totalCasesPerTaskCategoryByTimeOfDay = getTotalCasesPerTaskCategoryByTimeOfDay(df, taskCategory, timeOfDay, timeOfDayName);
df_totalCasesPerTaskCategoryByTimeOfDay

Unnamed: 0,Task Category,Morning,Afternoon,Evening
0,Academic Advising,26,14,6
1,Blackboard Issues,132,93,79
2,Bookstore,8,0,1
3,Career Centre,0,0,0
4,Citrix,9,7,7
5,CNA/Login Issues,77,70,43
6,Douglas College International,7,7,5
7,Enrollment Service,14,9,7
8,Kaltura,12,5,9
9,Library and Learning Centre,3,3,2


***Total Cases per Task Category by Time Range in a particular semester***

In [47]:
def getTotalCasesPerTaskCategoryByTimeRangeInSemester(dataframe, taskCategory, timeRange, dateRange):
  mask = (dataframe["Date"] >= dateRange[0]) & (dataframe["Date"] <= dateRange[1]);
  dataframeSemester = dataframe[mask];

  data = [];
  for category in taskCategory:
    row = [category];
    for tRange in timeRange:
      count = 0;
      for i in dataframeSemester.index:
        if (dataframeSemester.loc[i, "Task Category"] == category) & (dataframeSemester.loc[i, "Time Range"] == tRange):
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Task Category"];
  for tRange in timeRange:
    labels.append(tRange);
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

Total Cases per Task Category by Time Range in Winter 2021

In [48]:
df_totalCasesPerTaskCategoryByTimeRangeInWinter = getTotalCasesPerTaskCategoryByTimeRangeInSemester(df, taskCategory, timeRange, winter);
df_totalCasesPerTaskCategoryByTimeRangeInWinter

Unnamed: 0,Task Category,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,Academic Advising,4,1,0,0,1,0,1,1,0,...,1,0,0,1,0,0,2,0,0,1
1,Blackboard Issues,15,5,5,10,3,8,7,4,7,...,5,9,4,6,4,2,3,2,2,1
2,Bookstore,2,0,2,1,0,0,0,2,0,...,0,0,0,0,0,0,0,1,0,0
3,Career Centre,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Citrix,2,1,1,1,1,0,0,0,0,...,0,1,1,0,2,1,0,0,1,1
5,CNA/Login Issues,20,3,2,4,2,5,6,4,3,...,1,1,1,0,1,2,1,1,1,2
6,Douglas College International,2,0,0,0,0,0,2,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Enrollment Service,1,0,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Kaltura,1,0,0,0,0,0,0,1,0,...,1,1,0,1,0,0,0,0,0,2
9,Library and Learning Centre,0,1,0,0,0,0,1,0,1,...,1,0,1,0,1,0,0,0,0,0


Total Cases per Task Category by Time Range in Summer 2021

In [49]:
df_totalCasesPerTaskCategoryByTimeRangeInSummer = getTotalCasesPerTaskCategoryByTimeRangeInSemester(df, taskCategory, timeRange, summer);
df_totalCasesPerTaskCategoryByTimeRangeInSummer

Unnamed: 0,Task Category,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,Academic Advising,5,1,0,0,1,0,1,1,0,...,0,0,0,1,0,0,0,0,1,0
1,Blackboard Issues,4,2,1,3,3,2,0,7,4,...,2,1,0,1,2,1,1,2,1,0
2,Bookstore,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Career Centre,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Citrix,1,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,CNA/Login Issues,8,4,1,1,1,0,1,2,0,...,0,2,1,2,1,1,0,0,1,1
6,Douglas College International,1,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,1,0
7,Enrollment Service,4,1,1,1,0,1,1,2,0,...,0,0,0,0,0,1,0,0,2,1
8,Kaltura,2,1,0,0,0,3,3,1,0,...,1,1,0,0,0,1,1,1,1,0
9,Library and Learning Centre,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Total Cases Per Task Category by Time Range in Fall 2021

In [50]:
df_totalCasesPerTaskCategoryByTimeRangeInFall = getTotalCasesPerTaskCategoryByTimeRangeInSemester(df, taskCategory, timeRange, fall);
df_totalCasesPerTaskCategoryByTimeRangeInFall

Unnamed: 0,Task Category,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,Academic Advising,7,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,Blackboard Issues,18,5,4,2,3,2,6,1,1,...,6,2,6,9,5,7,5,1,1,1
2,Bookstore,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Career Centre,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Citrix,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,CNA/Login Issues,5,0,0,0,0,2,1,1,1,...,4,6,4,4,2,2,3,2,1,0
6,Douglas College International,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
7,Enrollment Service,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,1,0,0
8,Kaltura,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Library and Learning Centre,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


***Total Cases per Task Category by Time of Day in a particular Semester***

In [51]:
def getTotalCasesPerTaskCategoryByTimeOfDayInSemester(dataframe, taskCategory, timeOfDay, timeOfDayName, dateRange):
    mask = (dataframe["Date"] >= dateRange[0]) & (dataframe["Date"] <= dateRange[1]);
    dataframeSemester = dataframe[mask];

    data = [];
    for category in taskCategory:
        row = [category];
        for i in range(len(timeOfDay)):
            count = 0;
            for tRange in timeOfDay[i]:
                mask = dataframeSemester["Time Range"] == tRange;
                dataframeTimeRange = dataframeSemester[mask];
                for j in dataframeTimeRange.index:
                    if dataframeTimeRange.loc[j, "Task Category"] == category:
                        count += 1;
            row.append(count);
        data.append(row);
    labels = ["Task Category"];
    for tDay in timeOfDayName:
        labels.append(tDay);
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

Total Cases per Task Category by Time of Day in Winter 2021

In [52]:
df_totalCasesPerTaskCategoryByTimeOfDayInWinter = getTotalCasesPerTaskCategoryByTimeOfDayInSemester(df, taskCategory, timeOfDay, timeOfDayName, winter);
df_totalCasesPerTaskCategoryByTimeOfDayInWinter

Unnamed: 0,Task Category,Morning,Afternoon,Evening
0,Academic Advising,8,2,4
1,Blackboard Issues,64,39,33
2,Bookstore,7,0,1
3,Career Centre,0,0,0
4,Citrix,6,2,7
5,CNA/Login Issues,49,38,10
6,Douglas College International,4,2,0
7,Enrollment Service,3,0,0
8,Kaltura,2,3,4
9,Library and Learning Centre,3,3,2


Total Cases per Task Category by Time of Day in Summer 2021

In [53]:
df_totalCasesPerTaskCategoryByTimeOfDayInSummer = getTotalCasesPerTaskCategoryByTimeOfDayInSemester(df, taskCategory, timeOfDay, timeOfDayName, summer);
df_totalCasesPerTaskCategoryByTimeOfDayInSummer

Unnamed: 0,Task Category,Morning,Afternoon,Evening
0,Academic Advising,9,5,2
1,Blackboard Issues,26,18,9
2,Bookstore,0,0,0
3,Career Centre,0,0,0
4,Citrix,3,5,0
5,CNA/Login Issues,18,11,9
6,Douglas College International,2,2,3
7,Enrollment Service,11,3,4
8,Kaltura,10,2,5
9,Library and Learning Centre,0,0,0


Total Cases per Task Category by Time of Day in Fall 2021

In [54]:
df_totalCasesPerTaskCategoryByTimeOfDayInFall = getTotalCasesPerTaskCategoryByTimeOfDayInSemester(df, taskCategory, timeOfDay, timeOfDayName, fall);
df_totalCasesPerTaskCategoryByTimeOfDayInFall

Unnamed: 0,Task Category,Morning,Afternoon,Evening
0,Academic Advising,9,7,0
1,Blackboard Issues,42,36,37
2,Bookstore,1,0,0
3,Career Centre,0,0,0
4,Citrix,0,0,0
5,CNA/Login Issues,10,21,24
6,Douglas College International,1,3,2
7,Enrollment Service,0,6,3
8,Kaltura,0,0,0
9,Library and Learning Centre,0,0,0


### Load analyzed/aggregated data in regards to Task Category

In [55]:
taskCategoryData = pd.ExcelWriter("3_analyzedData/Task Category data.xlsx");

df_totalCasesPerTaskCategory.to_excel(taskCategoryData, sheet_name="Task Category", index=False);

df_totalCasesPerTaskCategoryBySemester.to_excel(taskCategoryData, sheet_name="Semester", index=False);

df_totalCasesPerTaskCategoryByMonth.to_excel(taskCategoryData, sheet_name="Month", index=False);

df_totalCasesPerTaskCategoryByDayOfWeek.to_excel(taskCategoryData, sheet_name="Weekday", index=False);

df_totalCasesPerTaskCategoryByDayOfWeekInWinter.to_excel(taskCategoryData, sheet_name="Weekday - Winter", index=False);
df_totalCasesPerTaskCategoryByDayOfWeekInSummer.to_excel(taskCategoryData, sheet_name="Weekday - Summer", index=False);
df_totalCasesPerTaskCategoryByDayOfWeekInFall.to_excel(taskCategoryData, sheet_name="Weekday - Fall", index=False);

df_totalCasesPerTaskCategoryByWeek.to_excel(taskCategoryData, sheet_name="Week", index=False);

df_totalCasesPerTaskCategoryByTimeRange.to_excel(taskCategoryData, sheet_name="Time Range", index=False);

df_totalCasesPerTaskCategoryByTimeOfDay.to_excel(taskCategoryData, sheet_name="Time of Day", index=False);

df_totalCasesPerTaskCategoryByTimeRangeInWinter.to_excel(taskCategoryData, sheet_name="Time Range - Winter", index=False);
df_totalCasesPerTaskCategoryByTimeRangeInSummer.to_excel(taskCategoryData, sheet_name="Time Range - Summer", index=False);
df_totalCasesPerTaskCategoryByTimeRangeInFall.to_excel(taskCategoryData, sheet_name="Time Range - Fall", index=False);

df_totalCasesPerTaskCategoryByTimeOfDayInWinter.to_excel(taskCategoryData, sheet_name="Time of Day - Winter", index=False);
df_totalCasesPerTaskCategoryByTimeOfDayInSummer.to_excel(taskCategoryData, sheet_name="Time of Day - Summer", index=False);
df_totalCasesPerTaskCategoryByTimeOfDayInFall.to_excel(taskCategoryData, sheet_name="Time of Day - Fall", index=False);

taskCategoryData.save();

## Analysis - Escalation Status

**Total Cases per Escalation Status**
> i.e., Escalation Status = Escalated to SD? = Escalated to Service Desk?

In [56]:
def getTotalCasesPerEscalationStatus(dataframe):
  data = [];
  for status in df["Escalated to SD?"].unique().tolist():
    row = [];
    count = 0;
    for i in df.index:
      if df.loc[i, "Escalated to SD?"] == status:
        count += 1;
    row.append(status);
    row.append(count);
    data.append(row);
  dfAgg = pd.DataFrame(data, columns=["Escalated to SD?","Total Cases"]);
  dfAgg = dfAgg.sort_values("Escalated to SD?", ascending=True);
  return dfAgg;

df_totalCasesPerEscalationStatus = getTotalCasesPerEscalationStatus(df);
df_totalCasesPerEscalationStatus

Unnamed: 0,Escalated to SD?,Total Cases
0,No,1018
1,Yes,144


**Total Cases per Escalation Status by Semester**

In [57]:
def getTotalCasesPerEscalationStatusBySemester(dataframe, semester, semesterNames):
  data = [];
  for status in dataframe["Escalated to SD?"].unique().tolist():
    row = [status];
    for i in range(len(semesterNames)):
      mask = (dataframe["Date"] >= semester[i][0]) & (dataframe["Date"] <= semester[i][1]);
      dataframeSemester = dataframe[mask];
      count = 0;
      for j in dataframeSemester.index:
        if dataframeSemester.loc[j, "Escalated to SD?"] == status:
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Escalated to SD?"];
  for semester in semesterNames:
    labels.append(semester);
  dfAgg = pd.DataFrame(data, columns=labels);
  dfAgg = dfAgg.sort_values("Escalated to SD?", ascending=True);
  return dfAgg;

df_totalCasesPerEscalationStatusBySemester = getTotalCasesPerEscalationStatusBySemester(df, semester, semesterNames);
df_totalCasesPerEscalationStatusBySemester

Unnamed: 0,Escalated to SD?,Winter,Summer,Fall
0,No,454,226,338
1,Yes,0,41,103


**Total Cases per Escalation Status by Month**

In [58]:
def getTotalCasesPerEscalationStatusByMonth(dataframe, monthName):
  data = [];
  for status in dataframe["Escalated to SD?"].unique().tolist():
    row = [status];
    for month in monthName:
      count = 0;
      for j in dataframe.index:
        if (dataframe.loc[j, "Escalated to SD?"] == status) & (dataframe.loc[j, "Date"].month_name() == month):
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Escalated to SD?"];
  for month in monthName:
    labels.append(month);
  dfAgg = pd.DataFrame(data, columns=labels);
  dfAgg = dfAgg.sort_values("Escalated to SD?", ascending=True);
  return dfAgg;

df_totalCasesPerEscalationStatusByMonth = getTotalCasesPerEscalationStatusByMonth(df, monthName);
df_totalCasesPerEscalationStatusByMonth

Unnamed: 0,Escalated to SD?,January,February,March,April,May,June,July,August,September,October,November,December
0,No,201,82,100,71,115,13,73,34,162,58,74,35
1,Yes,0,0,0,0,3,4,16,28,33,8,40,12


**Total Cases per Escalation Status by Day of Week**

In [59]:
def getTotalCasesPerEscalationStatusByDayOfWeek(dataframe, dayOfWeek):
  data = [];
  for status in dataframe["Escalated to SD?"].unique().tolist():
    row = [status];
    for day in dayOfWeek:
      count = 0;
      for j in dataframe.index:
        if (dataframe.loc[j, "Escalated to SD?"] == status) & (dataframe.loc[j, "Date"].day_name() == day):
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Escalated to SD?"];
  for day in dayOfWeek:
    labels.append(day);
  dfAgg = pd.DataFrame(data, columns=labels);
  dfAgg = dfAgg.sort_values("Escalated to SD?", ascending=True);
  return dfAgg;

df_totalCasesPerEscalationStatusByDayOfWeek = getTotalCasesPerEscalationStatusByDayOfWeek(df, dayOfWeek);
df_totalCasesPerEscalationStatusByDayOfWeek

Unnamed: 0,Escalated to SD?,Monday,Tuesday,Wednesday,Thursday,Friday
0,No,267,244,175,180,152
1,Yes,33,34,24,21,32


***Total Cases per Escalation Status by Day of Week in a particular semester***

In [60]:
def getTotalCasesPerEscalationStatusByDayOfWeekInSemester(dataframe, dayOfWeek, dateRange):
  mask = (dataframe["Date"] >= dateRange[0]) & (dataframe["Date"] <= dateRange[1]);
  dataframeSemester = dataframe[mask];
  
  data = [];
  for status in dataframe["Escalated to SD?"].unique().tolist():
    row = [status];
    for day in dayOfWeek:
      count = 0; 
      for j in dataframeSemester.index:
        if (dataframeSemester.loc[j, "Escalated to SD?"] == status) & (dataframeSemester.loc[j, "Date"].day_name() == day):
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Escalated to SD?"];
  for day in dayOfWeek:
    labels.append(day);
  dfAgg = pd.DataFrame(data, columns=labels);
  dfAgg = dfAgg.sort_values("Escalated to SD?", ascending=True);
  return dfAgg;

Total Cases per Escalation Status by Day of Week in Winter 2021

In [61]:
df_totalCasesPerEscalationStatusByDayOfWeekInWinter = getTotalCasesPerEscalationStatusByDayOfWeekInSemester(df, dayOfWeek, winter);
df_totalCasesPerEscalationStatusByDayOfWeekInWinter

Unnamed: 0,Escalated to SD?,Monday,Tuesday,Wednesday,Thursday,Friday
0,No,138,119,56,63,78
1,Yes,0,0,0,0,0


Total Cases per Escalation Status by Day of Week in Summer 2021

In [62]:
df_totalCasesPerEscalationStatusByDayOfWeekInSummer = getTotalCasesPerEscalationStatusByDayOfWeekInSemester(df, dayOfWeek, summer);
df_totalCasesPerEscalationStatusByDayOfWeekInSummer

Unnamed: 0,Escalated to SD?,Monday,Tuesday,Wednesday,Thursday,Friday
0,No,55,47,46,44,34
1,Yes,10,12,3,7,9


Total Cases per Esclation Status by Day of Week in Fall 2021

In [63]:
df_totalCasesPerEscalationStatusByDayOfWeekInFall = getTotalCasesPerEscalationStatusByDayOfWeekInSemester(df, dayOfWeek, fall);
df_totalCasesPerEscalationStatusByDayOfWeekInFall

Unnamed: 0,Escalated to SD?,Monday,Tuesday,Wednesday,Thursday,Friday
0,No,74,78,73,73,40
1,Yes,23,22,21,14,23


**Total Cases per Escalation Status by Week**

In [64]:
def getTotalCasesPerEscalationStatusByWeek(dataframe):
  workWeekNumbers = [];
  for i in dataframe.index:
    workWeekNumbers.append(dataframe.loc[i, "Date"].isocalendar()[1]);
  workWeekNumbers = list(dict.fromkeys(workWeekNumbers));

  data = [];
  for status in dataframe["Escalated to SD?"].unique().tolist():
    row = [status];
    for week in workWeekNumbers:
      count = 0;
      for j in dataframe.index:
        if (dataframe.loc[j, "Escalated to SD?"] == status) & (dataframe.loc[j, "Date"].isocalendar()[1] == week):
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Escalated to SD?"];
  for week in workWeekNumbers:
    labels.append("Week " + str(week));
  dfAgg = pd.DataFrame(data, columns=labels);
  return dfAgg;

df_totalCasesPerEscalationStatusByWeek = getTotalCasesPerEscalationStatusByWeek(df);
df_totalCasesPerEscalationStatusByWeek

Unnamed: 0,Escalated to SD?,Week 1,Week 2,Week 3,Week 4,Week 5,Week 6,Week 7,Week 8,Week 9,...,Week 41,Week 42,Week 43,Week 44,Week 45,Week 46,Week 47,Week 48,Week 49,Week 50
0,No,107,40,20,34,25,21,17,19,26,...,15,10,12,17,17,19,10,15,21,10
1,Yes,0,0,0,0,0,0,0,0,0,...,3,3,2,4,15,8,10,4,6,5


**Total Cases per Escalation Status by Time Range**

In [65]:
def getTotalCasesPerEscalationStatusByTimeRange(dataframe, timeRange):
  data = [];
  for status in dataframe["Escalated to SD?"].unique().tolist():
    row = [status];
    for tRange in timeRange:
      count = 0;
      for i in dataframe.index:
        if (dataframe.loc[i, "Escalated to SD?"] == status) & (dataframe.loc[i, "Time Range"] == tRange):
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Escalated to SD?"];
  for tRange in timeRange:
    labels.append(tRange);
  dfAgg = pd.DataFrame(data, columns=labels);
  dfAgg = dfAgg.sort_values("Escalated to SD?", ascending=True);
  return dfAgg;

df_totalCasesPerEscalationStatusByTimeRange = getTotalCasesPerEscalationStatusByTimeRange(df, timeRange);
df_totalCasesPerEscalationStatusByTimeRange

Unnamed: 0,Escalated to SD?,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,No,140,32,37,35,30,34,46,40,30,...,39,42,32,31,25,24,24,20,19,17
1,Yes,40,8,0,5,7,4,4,3,6,...,2,0,1,7,3,6,4,3,2,1


***Total Cases per Escalation Status by Time of Day***

In [66]:
def getTotalCasesPerEscalationStatusByTimeOfDay(dataframe, timeOfDay, timeOfDayName):
    data = [];
    for status in dataframe["Escalated to SD?"].unique().tolist():
        row = [status];
        for i in range(len(timeOfDay)):
            count = 0;
            for tRange in timeOfDay[i]:
                mask = dataframe["Time Range"] == tRange;
                dataframeTimeRange = dataframe[mask];
                for j in dataframeTimeRange.index:
                    if dataframeTimeRange.loc[j, "Escalated to SD?"] == status:
                        count += 1;
            row.append(count);
        data.append(row);
    labels = ["Escalated to SD?"];
    for tDay in timeOfDayName:
        labels.append(tDay);
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

df_totalCasesPerEscalationStatusByTimeOfDay = getTotalCasesPerEscalationStatusByTimeOfDay(df, timeOfDay, timeOfDayName);
df_totalCasesPerEscalationStatusByTimeOfDay

Unnamed: 0,Escalated to SD?,Morning,Afternoon,Evening
0,No,424,360,234
1,Yes,77,40,27


***Total Cases per Escalation Status by Time Range in a particular semester***

In [67]:
def getTotalCasesPerEscalationStatusByTimeRangeInSemester(dataframe, timeRange, dateRange):
  mask = (dataframe["Date"] >= dateRange[0]) & (dataframe["Date"] <= dateRange[1]);
  dataframeSemester = dataframe[mask];
  
  data = [];
  for status in dataframe["Escalated to SD?"].unique().tolist():
    row = [status];
    for tRange in timeRange:
      count = 0;
      for i in dataframeSemester.index:
        if (dataframeSemester.loc[i, "Escalated to SD?"] == status) & (dataframeSemester.loc[i, "Time Range"] == tRange):
          count += 1;
      row.append(count);
    data.append(row);
  labels = ["Escalated to SD?"];
  for tRange in timeRange:
    labels.append(tRange);
  dfAgg = pd.DataFrame(data, columns=labels);
  dfAgg = dfAgg.sort_values("Escalated to SD?", ascending=True);
  return dfAgg;

Total Cases per Escalation Status by Time Range in Winter 2021

In [68]:
df_totalCasesPerEscalationStatusByTimeRangeInWinter = getTotalCasesPerEscalationStatusByTimeRangeInSemester(df, timeRange, winter);
df_totalCasesPerEscalationStatusByTimeRangeInWinter

Unnamed: 0,Escalated to SD?,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,No,67,16,21,23,12,19,23,16,16,...,17,21,14,13,11,8,9,4,5,10
1,Yes,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Total Cases per Escalation Status by Time Range in Summer 2021

In [69]:
df_totalCasesPerEscalationStatusByTimeRangeInSummer = getTotalCasesPerEscalationStatusByTimeRangeInSemester(df, timeRange, summer);
df_totalCasesPerEscalationStatusByTimeRangeInSummer

Unnamed: 0,Escalated to SD?,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,No,32,8,11,9,11,9,9,18,7,...,3,5,3,3,4,5,6,5,8,3
1,Yes,15,4,0,3,3,2,1,1,0,...,0,0,0,1,0,0,0,0,0,0


Total Cases per Escalation Status by Time Range in Fall 2021

In [70]:
df_totalCasesPerEscalationStatusByTimeRangeInFall = getTotalCasesPerEscalationStatusByTimeRangeInSemester(df, timeRange, fall);
df_totalCasesPerEscalationStatusByTimeRangeInFall

Unnamed: 0,Escalated to SD?,07:00 - 07:30,07:30 - 08:00,08:00 - 08:30,08:30 - 09:00,09:00 - 09:30,09:30 - 10:00,10:00 - 10:30,10:30 - 11:00,11:00 - 11:30,...,16:00 - 16:30,16:30 - 17:00,17:00 - 17:30,17:30 - 18:00,18:00 - 18:30,18:30 - 19:00,19:00 - 19:30,19:30 - 20:00,20:00 - 20:30,20:30 - 21:00
0,No,41,8,5,3,7,6,14,6,7,...,19,16,15,15,10,11,9,11,6,4
1,Yes,25,4,0,2,4,2,3,2,6,...,2,0,1,6,3,6,4,3,2,1


***Total Cases per Escalation Status by Time of Day in a particular semester***

In [71]:
def getTotalCasesPerEscalationStatusByTimeOfDayInSemester(dataframe, timeOfDay, timeOfDayName, dateRange):
    mask = (dataframe["Date"] >= dateRange[0]) & (dataframe["Date"] <= dateRange[1]);
    dataframeSemester = dataframe[mask];

    data = [];
    for status in dataframe["Escalated to SD?"].unique().tolist():
        row = [status];
        for i in range(len(timeOfDay)):
            count = 0;
            for tRange in timeOfDay[i]:
                mask = dataframeSemester["Time Range"] == tRange;
                dataframeTimeRange = dataframeSemester[mask];
                for j in dataframeTimeRange.index:
                    if dataframeTimeRange.loc[j, "Escalated to SD?"] == status:
                        count += 1;
            row.append(count);
        data.append(row);
    labels = ["Escalated to SD?"];
    for tDay in timeOfDayName:
        labels.append(tDay);
    dfAgg = pd.DataFrame(data, columns=labels);
    return dfAgg;

Total Cases per Escalation Status by Time of Day in Winter 2021

In [72]:
df_totalCasesPerEscalationStatusByTimeOfDayInWinter = getTotalCasesPerEscalationStatusByTimeOfDayInSemester(df, timeOfDay, timeOfDayName, winter);
df_totalCasesPerEscalationStatusByTimeOfDayInWinter

Unnamed: 0,Escalated to SD?,Morning,Afternoon,Evening
0,No,213,146,95
1,Yes,0,0,0


Total Cases per Escalation Status by Time of Day in Summer 2021

In [73]:
df_totalCasesPerEscalationStatusByTimeOfDayInSummer = getTotalCasesPerEscalationStatusByTimeOfDayInSemester(df, timeOfDay, timeOfDayName, summer);
df_totalCasesPerEscalationStatusByTimeOfDayInSummer

Unnamed: 0,Escalated to SD?,Morning,Afternoon,Evening
0,No,114,70,42
1,Yes,29,11,1


Total Cases per Escalation Status by Time of Day in Fall 2021

In [74]:
df_totalCasesPerEscalationStatusByTimeOfDayInFall = getTotalCasesPerEscalationStatusByTimeOfDayInSemester(df, timeOfDay, timeOfDayName, fall);
df_totalCasesPerEscalationStatusByTimeOfDayInFall

Unnamed: 0,Escalated to SD?,Morning,Afternoon,Evening
0,No,97,144,97
1,Yes,48,29,26


**Total Cases per Escalation Status by Task Category**

In [75]:
def getTotalCasesPerEscalationStatusByTaskCategory(dataframe, taskCategory):
  data = [];
  for status in dataframe["Escalated to SD?"].unique().tolist():
    row = [status];
    for category in taskCategory:
      count = len(dataframe[(dataframe["Escalated to SD?"] == status) & (dataframe["Task Category"] == category)])
      row.append(count);
    data.append(row);
  labels = ["Escalated to SD?"];
  for category in taskCategory:
    labels.append(category);
  dfAgg = pd.DataFrame(data, columns=labels);
  dfAgg = dfAgg.sort_values("Escalated to SD?", ascending=True);
  return dfAgg;

df_totalCasesPerEscalationStatusByTaskCategory = getTotalCasesPerEscalationStatusByTaskCategory(df, taskCategory);
df_totalCasesPerEscalationStatusByTaskCategory

Unnamed: 0,Escalated to SD?,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,No,46,297,9,0,23,174,19,29,26,8,27,152,90,80,9,13,16
1,Yes,0,7,0,0,0,16,0,1,0,0,0,1,116,1,1,1,0


***Total Cases per Escalation Status by Task Category in a particular semester***

In [76]:
def getTotalCasesPerEscalationStatusByTaskCategoryInSemester(dataframe, taskCategory, dateRange):
  mask = (dataframe["Date"] >= dateRange[0]) & (dataframe["Date"] <= dateRange[1]);
  dataframeSemester = dataframe[mask];

  data = [];
  for status in dataframe["Escalated to SD?"].unique().tolist():
    row = [status];
    for category in taskCategory:
      count = len(dataframeSemester[(dataframeSemester["Escalated to SD?"] == status) & (dataframeSemester["Task Category"] == category)])
      row.append(count);
    data.append(row);
  labels = ["Escalated to SD?"];
  for category in taskCategory:
    labels.append(category);
  dfAgg = pd.DataFrame(data, columns=labels);
  dfAgg = dfAgg.sort_values("Escalated to SD?", ascending=True);
  return dfAgg;

Total Cases per Escalation Status by Task Category in Winter 2021

In [77]:
df_totalCasesPerEscalationStatusByTaskCategoryInWinter = getTotalCasesPerEscalationStatusByTaskCategoryInSemester(df, taskCategory, winter);
df_totalCasesPerEscalationStatusByTaskCategoryInWinter

Unnamed: 0,Escalated to SD?,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,No,14,136,8,0,15,97,6,3,9,8,20,70,35,22,7,0,4
1,Yes,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Total Cases per Escalation Status by Task Category in Summer 2021

In [78]:
df_totalCasesPerEscalationStatusByTaskCategoryInSummer = getTotalCasesPerEscalationStatusByTaskCategoryInSemester(df, taskCategory, summer);
df_totalCasesPerEscalationStatusByTaskCategoryInSummer

Unnamed: 0,Escalated to SD?,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,No,16,53,0,0,8,30,7,17,17,0,7,28,18,19,1,0,5
1,Yes,0,0,0,0,0,8,0,1,0,0,0,0,30,1,1,0,0


Total Cases per Escalation Status by Task Category in Fall 2021

In [79]:
df_totalCasesPerEscalationStatusByTaskCategoryInFall = getTotalCasesPerEscalationStatusByTaskCategoryInSemester(df, taskCategory, fall);
df_totalCasesPerEscalationStatusByTaskCategoryInFall

Unnamed: 0,Escalated to SD?,Academic Advising,Blackboard Issues,Bookstore,Career Centre,Citrix,CNA/Login Issues,Douglas College International,Enrollment Service,Kaltura,Library and Learning Centre,Lockdown Browser,Microsoft 365 Issues,MyAccount Issues,Others,Printing,WiFi Connectivity,Zoom Help
0,No,16,108,1,0,0,47,6,9,0,0,0,54,37,39,1,13,7
1,Yes,0,7,0,0,0,8,0,0,0,0,0,1,86,0,0,1,0


### Load analyzed/aggregated data in regards to Escalation Status

In [80]:
escalationStatusData = pd.ExcelWriter("3_analyzedData/Escalation Status data.xlsx");

df_totalCasesPerEscalationStatus.to_excel(escalationStatusData, sheet_name="Escalation Status", index=False);

df_totalCasesPerEscalationStatusBySemester.to_excel(escalationStatusData, sheet_name="Semester", index=False);

df_totalCasesPerEscalationStatusByMonth.to_excel(escalationStatusData, sheet_name="Month", index=False);

df_totalCasesPerEscalationStatusByDayOfWeek.to_excel(escalationStatusData, sheet_name="Weekday", index=False);

df_totalCasesPerEscalationStatusByDayOfWeekInWinter.to_excel(escalationStatusData, sheet_name="Weekday - Winter", index=False);
df_totalCasesPerEscalationStatusByDayOfWeekInSummer.to_excel(escalationStatusData, sheet_name="Weekday - Summer", index=False);
df_totalCasesPerEscalationStatusByDayOfWeekInFall.to_excel(escalationStatusData, sheet_name="Weekday - Fall", index=False);

df_totalCasesPerEscalationStatusByWeek.to_excel(escalationStatusData, sheet_name="Week", index=False);

df_totalCasesPerEscalationStatusByTimeRange.to_excel(escalationStatusData, sheet_name="Time Range", index=False);

df_totalCasesPerEscalationStatusByTimeOfDay.to_excel(escalationStatusData, sheet_name="Time of Day", index=False);

df_totalCasesPerEscalationStatusByTimeRangeInWinter.to_excel(escalationStatusData, sheet_name="Time Range - Winter", index=False);
df_totalCasesPerEscalationStatusByTimeRangeInSummer.to_excel(escalationStatusData, sheet_name="Time Range - Summer", index=False);
df_totalCasesPerEscalationStatusByTimeRangeInFall.to_excel(escalationStatusData, sheet_name="Time Range - Fall", index=False);

df_totalCasesPerEscalationStatusByTimeOfDayInWinter.to_excel(escalationStatusData, sheet_name="Time of Day - Winter", index=False);
df_totalCasesPerEscalationStatusByTimeOfDayInSummer.to_excel(escalationStatusData, sheet_name="Time of Day - Summer", index=False);
df_totalCasesPerEscalationStatusByTimeOfDayInFall.to_excel(escalationStatusData, sheet_name="Time of Day - Fall", index=False);

df_totalCasesPerEscalationStatusByTaskCategory.to_excel(escalationStatusData, sheet_name="Task Category", index=False);

df_totalCasesPerEscalationStatusByTaskCategoryInWinter.to_excel(escalationStatusData, sheet_name="Task Category - Winter", index=False);
df_totalCasesPerEscalationStatusByTaskCategoryInSummer.to_excel(escalationStatusData, sheet_name="Task Category - Summer", index=False);
df_totalCasesPerEscalationStatusByTaskCategoryInFall.to_excel(escalationStatusData, sheet_name="Task Category - Fall", index=False);

escalationStatusData.save();