In [1]:
# imports
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
import xlrd
import os
import openpyxl
import matplotlib.pyplot as plt
import csv  # The CSV module
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
from varname import nameof, argname
import tkinter as Tk
from tkinter import filedialog
from tkinter.filedialog import askopenfilename, asksaveasfilename
from tkinter.messagebox import showwarning, showinfo
import re

In [2]:
# Get the folder containing the data files
root = Tk.Tk()
root.withdraw() #use to hide tkinter window

currdir = os.getcwd()

In [3]:
def load_file(title, suggested_filename=None):
    global currdir
    if suggested_filename:
        file_path = filedialog.askopenfilename(parent=root, initialdir=currdir, title=title, initialfile=suggested_filename, defaultextension=".csv")
    else:
        file_path = filedialog.askopenfilename(parent=root, initialdir=currdir, title=title, defaultextension=".csv")
    return file_path

In [4]:
def save_file(title, suggested_filename=None):
    files = [('CSV Files','*.csv'), ('All Files','*.*')]
    if suggested_filename:
        wfile = asksaveasfilename(title = title, filetypes = files, initialfile=suggested_filename, defaultextension=".csv")
    else:
        wfile = asksaveasfilename(title = title, filetypes = files, defaultextension=".csv")
    return wfile

# 1. Load All Teaching Evaluaions Data into single dataframe

In [5]:
# Select a folder containing the data files
folder = filedialog.askdirectory(parent=root, initialdir=currdir, title='Please select a directory')

In [6]:
# A function that takes in a string. 
# If the string has the word "-question" in it, then it returns a list with of two items.
# The first item is  the string "question", the second item is the substring of the input string that comes before the word "-question".
# Else if the string has the word "-response" in it, then it returns a list with of two items.
# The first item is  the string "response", the second item is the substring of the input string that comes before the word "-response".

def get_question_or_response(string):
    if "-question" in string:
        return ["question", string.split("-question")[0]]
    elif "-response" in string:
        return ["response", string.split("-response")[0]]
    else:
        return [None, None]

In [18]:
# Function that takes in a string and returns the term corresponding to the string.
def get_term(string):
    ret_string = ""
    if string[-1] == "1":
        ret_string = "Winter"
    elif string[-1] == "5":
        ret_string = "Spring"
    elif string[-1] == "9":
        ret_string = "Fall"
    
    temp_string = "20"
    temp_string += string[1:len(string)-1]
    ret_string += " " + temp_string
    return ret_string


In [22]:
# Function that takes in a string of the form "MATH 235-1165" and returns a list of two items.
# The first item is the substring of the input string that comes before the hyphen.
# The second item is the term based on the substring of the input string that comes after the hyphen.
def get_course_and_term(string):
    return [string.split("-")[0], get_term(string.split("-")[1])]

In [24]:
# Create a dictionary of list of dictionaries of dataframes with the filenames as keys
dataframes = {}
dataframes["question"] = list()
dataframes["response"] = list()

for filename in os.listdir(folder):
    if filename.endswith('.csv'):
        key, val = get_question_or_response(filename)
        # obtain the data frame from the csv file
        temp_df = pd.read_csv(os.path.join(folder, filename))
        # add two columns to the data frame. One for the course and one for the term.
        temp_df["Course"] = get_course_and_term(filename)[0]
        temp_df["Term"] = get_course_and_term(filename)[1]
        # append the data frame to the list of dictionaries of dataframes
        dataframes[key].append({f'{val}': temp_df})
    else:
        print(f"Skipping file {filename}")

In [25]:
# Merge all the "question" dataframes into one dataframe
questions_df = pd.concat([df for dic in dataframes["question"] for df in dic.values()])


In [26]:
# Merge all the "response" dataframes into one dataframe
responses_df = pd.concat([df for dic in dataframes["response"] for df in dic.values()])

In [27]:
# Save the question and response dataframes to csv files
questions_df.to_csv(save_file("Save the question data", "questions.csv"), index=False)
responses_df.to_csv(save_file("Save the response data", "responses.csv"), index=False)

19
