In [1]:
# %load pyFiles/main.py
class Faculty:
    """
    Object which holds two pandas Series, one which holds the 2014 census data and one which holds
    the 2016 check-in data.
    """
    def __init__(self, c14=None, c16=None):
        self.c14 = c14
        self.c16 = c16


def match_faculty(df1, df2, col1, col2):
    """
    Match strings from a column of one dataframe to a column of another.
    Return a list of Faculty who have matches on both lists.

    :param df1: pandas DataFrame.
    :param df2: pandas DataFrame.
    :param col1: str. Used to choose column in df1
    :param col2: str. Used to choose column in df2
    :return: list of Faculty
    """
    emails_string = ""
    matched_faculty = []
    matches = 0
    dmatches = 0
    for i, email in enumerate(df1[col1]):  # Iterates through every single pair to check for duplicates
        count = 0
        for j, email2 in enumerate(df2[col2]):
            if type(email) == str and type(email2) == str:
                if email.strip().lower() == email2.strip().lower():
                    count += 1
                    if count == 1:
                        matches += 1
                        new_fac = Faculty(c14=df2.iloc[j], c16=df1.iloc[i])
                        matched_faculty.append(new_fac)
                    elif count > 1:
                        dmatches += 1
        if count == 0:
            emails_string = emails_string + email + "\n"  # TODO: Analyze duplicates
    return matched_faculty


def q_map(name, value=None, series=None):
    """
    Maps raw data from a certain variable name to legible or workable outputs.
    :param name: str. Variable name to be mapped
    :param value: str. Optional, value to match.
    :param series: Optional, series to be indexed with name
    :return: Mapped value.
    """
    if series is not None:
        value = series[name]
    if name == "Q37":
        if value == "1":
            return "Assistant Professor"
        elif value == "2":
            return "Associate Professor"
        elif value == "3":
            return "Tenured Professor"
        elif value == "4":
            return "Senior Professor"
        elif value == "5":
            return "Professor, Non-tentured track"
        elif value == "6":
            return "Postdoctoral Associate/Fellow"
        elif value == "7":
            return "Graduate Student"
        elif value == "12":
            return "Other"
        else:
            return "Error: Value Not Found"


In [2]:
import pandas as pd

Read in csv files.

In [3]:
d2014_data = pd.read_csv("../data/2014data.csv", keep_default_na=False, na_filter=False, dtype=str)
d2016_data = pd.read_csv("../data/2016data2.csv", keep_default_na=False, na_filter=False, dtype=str)

Use the `match_faculty` function to make a list of faculty who have the same email in both 2014 and 2016

In [4]:
matched_faculty = match_faculty(d2016_data, d2014_data, "RecipientEmail", "RecipientEmail")

Make a list of the regional leaders, or faculty who have a value of `"2"` for `Q114`.

In [5]:
print(len(matched_faculty))

357


In [6]:
regional_leaders = [fac for fac in matched_faculty if "2" in fac.c14["Q114"]]

In [7]:
print(len(regional_leaders))

9


In [8]:
for leader in regional_leaders:
    position = leader.c16["Q37"]
    print(position)

2
3
12
2
2
4
12
2
5


Use `q_map` to get position names instead of numbers.

In [9]:
for leader in regional_leaders:
    position = q_map("Q37", series=leader.c16)
    if position == "Other":
        print(position, leader.c16["Q26"])
    else:
        print(position)

Associate Professor
Tenured Professor
Other Professor of MCD Biology, Emeritus
Associate Professor
Associate Professor
Senior Professor
Other Teaching and Learning Center
Associate Professor
Professor, Non-tentured track
