# Zoom grader

Loads zoom reports from a pre-defined folder, and calculates attendance.

In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import time

In [147]:
folder_name = '../../data/attendance/'

In [187]:
# Read all attendance data

df = pd.DataFrame()

file_list = os.listdir(folder_name)
print("Total files: ",len(file_list))

for fname in file_list:
    # print(fname)
    if fname[:11]!='participant':
        continue # Ignore everything that is not a zoom log
    fullname = folder_name + fname
    header = pd.read_csv(fullname, header=0, nrows=1)
    name = header['Topic'][0]
    datestring = header['Start Time'][0][:10]
    #print(name, datestring)
    
    data = pd.read_csv(fullname, header=2)
    data['Date'] = datestring
    data['Meeting'] = name
    data['User Email'] = data['User Email'].fillna('none') # NaNs are ignored by aggregation below
    
    df = df.append(data, ignore_index=True)

Total files:  48


In [188]:
df= df.rename({'User Email': 'email', 'Total Duration (Minutes)': 'minutes', 'Name (Original Name)': 'name'}, axis=1)
df.columns

Index(['name', 'email', 'minutes', 'Date', 'Meeting'], dtype='object')

In [148]:
# Read the list of students
people = pd.read_csv(folder_name + 'people.txt', header=0, sep='\t')
people['name'] = people['first'] + ' ' + people['last']

In [228]:
# Iteratively building summaries

dfs = df.groupby(['Date', 'Meeting', 'email', 'name']).agg({'minutes' : sum})
dfs = dfs.reset_index()
dfs.minutes = 1*(dfs.minutes>30)
meeting_dict = {'Neuro Lab - Sep 10': 'Neuro', "Arseny Khakhalin's Zoom Meeting": 'Neuro', 
                "Intro Neuro": 'Neuro',
                'Biosem 00': 'Biosem', 'Bard Biosem': 'Biosem'}
dfs['Meeting'] = dfs['Meeting'].replace(meeting_dict)

dfs = dfs.groupby(['email','name','Meeting']).agg({'minutes': sum})

with pd.option_context('display.max_rows', 1400):
    #print(dfs)
    pass