# Biosem forms viewer

Expects that pre-lecture forms (google docs exported as excel) are all placed in a single folder, and then a full list of students is stored in a different file. Matches student emails between the list and the forms, and creates a summary.

In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
folder_name = '../../data/biosem_forms/'

In [None]:
# Read student list
people = pd.read_csv(folder_name + 'people.txt', header=0, sep='\t')
people['name'] = people['first'] + ' ' + people['last']
people = people.drop_duplicates()
people = people.reset_index()
people['count'] = 0
print(people.columns)

alts = pd.read_csv(folder_name + 'alt_emails.csv', header=0)
print(alts.columns)

In [None]:
# Process forms
margin = 12 + 10/60

file_list = os.listdir(folder_name)
print("Total files: ",len(file_list))
problem_list = []
for fname in file_list:    
    if fname[-4:]!='xlsx':
        continue # Ignore everything that is not a zoom log
    # print(fname)
    fullname = folder_name + fname
    data = pd.read_excel(fullname)
    data.columns = ['timestamp', 'email', 'question']
    data['dt'] = pd.to_datetime(data.timestamp)
    for i in range(data.shape[0]):
        student = data.email[i].lower().strip()
        if student not in people.email.values:  # Couldn't find this email right away
            if student in alts.email.values:    # A known alt
                correction = alts.loc[alts.email==student].real_email.values[0]
                print(student, '→', correction)
                student = correction
            else:
                print(f"Problem: {student}")
                if student not in problem_list: # Remember a problem and skip it
                    problem_list.append(student)
                continue
                
        ind = np.argmax(people.email == student) # Find the student
        if ((data.dt[i].dayofweek == 3) and 
            (data.dt[i].hour + data.dt[i].minute/60 > margin)):
            print(f"{people.loc[ind,'name']}\t late by {data.dt[i].hour*60 - 12*60 + data.dt[i].minute} minutes")
        #print(ind, student, people.loc[ind,'name'])
        people.loc[ind, 'count'] += 1            
            
            
print('---Problematic records:---\n', '\n'.join(problem_list), sep='')

In [None]:
people.query('count>0')