# Biosem forms viewer

Expects that pre-lecture forms (google docs exported as excel) are all placed in a single folder, and then a full list of students is stored in a different file. Matches student emails between the list and the forms, and creates a summary.

In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
folder_name = '../../data/biosem_forms/'

In [None]:
# Read student list
people = pd.read_csv(folder_name + 'people.txt', header=0, sep='\t')
people['name'] = people['first'] + ' ' + people['last']
people = people.drop_duplicates()
people = people.reset_index()
people['count'] = 0
print(people.columns)

alts = pd.read_csv(folder_name + 'alt_emails.csv', header=0)
print(alts.columns)

In [None]:
# Process forms
margin = 12 + 10/60

file_list = os.listdir(folder_name)
print("Total files: ",len(file_list))
problem_list = []
for fname in file_list:    
    if fname[-4:]!='xlsx':
        continue # Ignore everything that is not a zoom log
    # print(fname)
    fullname = folder_name + fname
    data = pd.read_excel(fullname)
    data.columns = ['timestamp', 'email', 'question']
    data['dt'] = pd.to_datetime(data.timestamp)
    for i in range(data.shape[0]):
        student = data.email[i].lower().strip()
        if student not in people.email.values:  # Couldn't find this email right away
            if student in alts.email.values:    # A known alt
                correction = alts.loc[alts.email==student].real_email.values[0]
                # print(student, '→', correction)
                student = correction
            else:
                print(f"Problem: {student}")
                if student not in problem_list: # Remember a problem and skip it
                    # problem_list.append(student)
                    pass
                continue
                
        ind = np.argmax(people.email == student) # Find the student
        if ((data.dt[i].dayofweek == 3) and 
            (data.dt[i].hour + data.dt[i].minute/60 > margin)):
            print(f"{people.loc[ind,'name']}\t late by {data.dt[i].hour*60 - 12*60 + data.dt[i].minute} minutes")
        #print(ind, student, people.loc[ind,'name'])
        people.loc[ind, 'count'] += 1            
            
            
print('---Problematic records:---\n', '\n'.join(problem_list), sep='')

In [None]:
answer = people.query('count>0')
# answer

In [None]:
# Read the attendance chart, mix, and print it together
attendance = pd.read_csv(folder_name + 'attendance_summary.csv')
# print(attendance.columns)

out = (answer.
       merge(attendance[['email','check']], how='left', on='email', suffixes=[None,None]).
       drop(columns=['index']).
       rename({'count':'questions', 'check':'attended'}, axis=1).
       sort_values(by='name')
      )

out

A piece to copy in "Fails":

> but submitted only 1 pre-seminar question out of 12. For now I'm marking it as an "F". If you want to write a short (~2 pages) collection of loosely connected paragraphs, reflecting on 4-5 things that you learned from this seminar series; things that seemed important and intersting to you personally, and taken from different talks, not from one talk, let me know! If you send this text to me any time before March or so, and if it's reasonably good, I'll change the grade to a "Pass".

In [None]:
for i in range(out.shape[0]):
    attended = max([out.attended[i], out.questions[i]]) # No idea where these    
    questions = min(out.questions[i], 12)
    if attended >= 10 and questions >= 10:
        conclusion = 'which is a Pass!'
    else:
        conclusion = ('which should technically be an "F", but ok, last semester was weird, '+
                      'let\'s call it a "Pass" this one time.')
    if attended==13:
        attended = 'all 13'
    else:
        attended = str(attended) + '/13'
    print(f"{out['last'][i]}:\nDear {out['first'][i]}, you attended {attended} seminars, and submitted " + 
          f"{questions}/12 pre-seminar questions, {conclusion}\n")