In [None]:
# Import Numpy and Datascience modules.
import numpy as np
import pandas as pd
from datascience import *

# Plotting modules
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import warnings
warnings.simplefilter('ignore', UserWarning)

# Hypothesis: Female CST students get better grades than  male students

**Disclaimer:** The following data set and analysis is purely a demonstration of hypothesis testing,and does not represent a systematic analysis of grades by gender at Temple. The dataset represents one CST class for one semester only.

In [None]:
# Load the data
grades = Table.read_table("data/Gender.csv")
grades.show(3)

In [None]:
# Find gender choices
np.unique(grades.column("Gender"))

In [None]:
# Find grade choices
np.unique(grades.column("Current Grade"))

In [None]:
# Filter out withdrawals. Examine only students who received a final grade.
grades = grades.where("Current Grade", are.not_equal_to("W")).where("Current Grade", are.not_equal_to("WE"))

In [None]:
np.unique(grades.column("Current Grade"))

In [None]:
# Convert letter grade to numerical equivalent
def grade_letter2number(letter_grade):
    if letter_grade == "A":
        numerical_grade = 4.0
    elif letter_grade == "A-":
        numerical_grade = 3.67
    elif letter_grade == "B+":
        numerical_grade = 3.33
    elif letter_grade == "B":
        numerical_grade = 3.0
    elif letter_grade == "B-":
        numerical_grade = 2.67
    elif letter_grade == "C+":
        numerical_grade = 2.33
    elif letter_grade == "C":
        numerical_grade = 2.0
    elif letter_grade == "C-":
        numerical_grade = 1.67
    elif letter_grade == "D+":
        numerical_grade = 1.33
    elif letter_grade == "D":
        numerical_grade = 1.0
    elif letter_grade == "D-":
        numerical_grade = 0.67
    else:
        numerical_grade = 0.0  # Default for any other grade
    return numerical_grade

In [None]:
# Add the numerical grade column to the table
grades = grades.with_columns("Numerical Grade", grades.apply(grade_letter2number, "Current Grade"))
grades

In [None]:
# Find the mean grade difference between genders
mean_grade_by_gender = grades.select("Gender", "Numerical Grade").group("Gender", np.mean)
mean_grade_by_gender

In [None]:
observed_difference = (mean_grade_by_gender.column("Numerical Grade mean").item(0) 
                      - mean_grade_by_gender.column("Numerical Grade mean").item(1))
observed_difference

In [None]:
# Find the sample size
N = grades.num_rows
N

In [None]:
# Find the gender breakdown
N_male = grades.where("Gender", "M").num_rows
N_female = grades.where("Gender", "F").num_rows
print(f"Of the {N} students, {N_male} are male and {N_female} are female.")                       

In [None]:
# Pivot to find the count of each grade by gender
pivot = grades.pivot("Gender", "Numerical Grade")
pivot

In [None]:
# Plot the distribution
pivot.barh("Numerical Grade")

In [None]:
# Do it again, this time by percentage
pivot = pivot.with_columns("F%", 100 * pivot.column("F") / N_female).with_columns("M%", 100 * pivot.column("M") / N_male)
pivot

In [None]:
pivot.select("Numerical Grade", "F%", "M%").barh("Numerical Grade")

In [None]:
def difference_of_means(table, group_label):
    """
    Takes: the name of a table and column label that indicates the group to which the row belongs
    Returns: Difference of mean grades of the two groups
    """
    reduced = table.select('Numerical Grade', group_label)
    means_table = reduced.group(group_label, np.average)
    means = means_table.column(1)
    return means.item(1) - means.item(0)

In [None]:
def one_simulated_difference_of_means():
    """
    Returns: Difference between mean grades by gender after shuffling labels
    """
    
    # array of shuffled labels
    shuffled_labels = grades.sample(with_replacement=False).column('Gender')
    
    # table of grades and shuffled labels
    shuffled_table = grades.select('Numerical Grade').with_column(
        'Shuffled Label', shuffled_labels)
    
    return difference_of_means(shuffled_table, 'Shuffled Label') 

In [None]:
# Test our functions
one_simulated_difference_of_means()

In [None]:
# Run the simulation
differences = make_array()
repetitions = 10000
for i in np.arange(repetitions):
    new_difference = one_simulated_difference_of_means()
    differences = np.append(differences, new_difference)    

In [None]:
# Plot the simulation results
Table().with_column('Difference Between Group Means', differences).hist(bins=30)
print('Observed Difference:', observed_difference)
ax = plt.gca()
ax.plot(observed_difference, 0,  marker='^', markersize=40, mec='red')
ax.set_title('Prediction Under the Null Hypothesis');

In [None]:
# Calculate the p-value as the fraction of simulations exceeding observed difference
p_value = np.count_nonzero(differences >= observed_difference) / repetitions
print("The p-value is :", p_value)

## Conclusion

There is a 15.5% chance that the observed difference between male and female grades in this class occurred purely by chance. Thus, we cannot reject the null hypothesis with 99% or even 95% confidence. The difference between male and female performance in this class is not statistically significant.