Gender Biases in Student Evaluations of Teachers
====================================================


In [8]:
%matplotlib inline
import math
import numpy as np
from numpy.random import random
import scipy as sp
from scipy import special
import matplotlib.pyplot as plt
from __future__ import division
# import permute #Install instructions at https://github.com/statlab/permute

In [9]:
dat = pd.read_stata("sample_permutation.dta")
dat = dat[dat.admission_cep == 0]
dat.describe()

Unnamed: 0,student_id,year,entreescpoen,stu_male,stu_female,admission_exam,admission_cep,admission_bactb,admission_other,stu_avg_final,...,history,micro,ip,macro,socio,scpo,both_female,both_male,stumale_proffemale,stufemale_profmale
count,16.0,16.0,16.0,16.0,16.0,16.0,16,16.0,16,16.0,...,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0
mean,2687.5,2010.5625,2010.5,0.5625,0.4375,0.9375,0,0.0625,0,13.314236,...,0.25,0.1875,0.1875,0.0625,0.25,0.0625,0.1875,0.3125,0.25,0.25
std,1121.691401,1.152895,1.21106,0.512348,0.512348,0.25,0,0.25,0,1.401222,...,0.447214,0.403113,0.403113,0.25,0.447214,0.25,0.403113,0.478714,0.447214,0.447214
min,556.0,2008.0,2008.0,0.0,0.0,0.0,0,0.0,0,9.833334,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1922.0,2010.0,2010.0,0.0,0.0,1.0,0,0.0,0,12.368056,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,2774.0,2010.5,2010.5,1.0,0.0,1.0,0,0.0,0,13.416667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,3582.25,2011.25,2011.25,1.0,1.0,1.0,0,0.0,0,14.284722,...,0.25,0.0,0.0,0.0,0.25,0.0,0.0,1.0,0.25,0.25
max,4414.0,2012.0,2012.0,1.0,1.0,1.0,0,1.0,0,15.138889,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [7]:
evals = ['q16', 'q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10', 'q13', 'q14', 'q15']
if isinstance(dat.q16[0], str):
    for e in evals:
        dat[e] = dat[e].replace(['nonpertinent', 'insuffisant', 'moyen', 'bon', 'excellent'], [0,1,2,3,4])
    
dat['course'] = dat['prof_id']
dat['course'][dat['history']==1] = 'history' 
dat['course'][dat['micro']==1] = 'micro' 
dat['course'][dat['ip']==1] = 'ip' 
dat['course'][dat['macro']==1] = 'macro' 
dat['course'][dat['socio']==1] = 'socio' 
dat['course'][dat['scpo']==1] = 'scpo' 

dat['gender'] = dat.prof_male
dat['gender'][dat.gender==1] = 'M'
dat['gender'][dat.gender==0] = 'F'
dat['student_gender'] = dat.stu_male
dat['student_gender'][dat.student_gender==1] = 'M'
dat['student_gender'][dat.student_gender==0] = 'F'

grouped = dat.groupby(['prof_id', 'course', 'gender', 'student_gender'])
prof_ratings = grouped[evals].mean()

assess_grade_mean = grouped['note_finale'].agg([np.mean,pass_fail])

teacher_data = pd.concat([prof_ratings, pass_rate], axis=1)
teacher_data = teacher_data.rename(columns = {'mean':'mean_final_exam', '<lambda>':'pass_rate'}).reset_index()
teacher_data

Unnamed: 0,prof_id,course,gender,student_gender,q16,q1,q2,q3,q4,q5,q6,q7,q8,q9,q10,q13,q14,q15,mean_final_exam,pass_rate
0,1,scpo,F,F,3,3,3,3,3,4,0,2,4,4,3,2,1,2,14.166667,1
1,2,micro,M,M,2,2,4,3,2,2,3,4,3,3,2,2,2,2,12.333333,1
2,8,socio,F,F,3,3,3,2,2,3,3,3,3,3,3,2,2,2,16.5,1
3,13,socio,M,M,3,3,4,3,4,4,3,3,3,3,2,2,2,1,9.0,0
4,16,socio,F,M,3,3,3,2,2,3,3,2,3,3,2,4,2,2,11.666667,1
5,21,micro,M,M,3,3,3,2,3,3,2,3,4,4,2,3,2,2,14.833333,1
6,28,ip,F,M,4,4,3,4,3,4,3,4,4,4,3,3,2,2,13.166667,1
7,34,history,M,F,3,4,3,4,3,3,4,4,3,3,2,2,2,2,11.333333,1
8,47,history,F,M,3,3,3,3,4,2,2,3,3,3,2,3,2,2,13.666667,1
9,51,micro,F,M,3,3,3,2,3,3,0,3,0,3,3,1,2,2,12.0,1


Inter-rater Reliability
---------------------
The matrix has a row for each student and a column for each class they took.

In [10]:
#stud_ratings = dat.pivot_table(rows = 'student_id', columns = ['stud_gender','triplette_new'])

Code for running the analyses
===============

In [12]:

def corr(x, y, reps=10**4, rs=None):
    '''
    Simulate permutation p-value for Spearman correlation coefficient
    Returns test statistic, simulations, left-sided p-value, right-sided p-value, two-sided p-value
    '''
    if rs == None:
        rs = np.random.RandomState()
    t = np.corrcoef(x, y)[0,1]
    sims = [np.corrcoef(rs.permutation(x), y)[0,1] for i in range(reps)]
    return t, np.sum(sims <= t)/reps, np.sum(sims >= t)/reps, np.sum(np.abs(sims) >= math.fabs(t))/reps, sims

def stratCorrTst(x, y, group):
    '''
    Calculates sum of Spearman correlations between x and y, computed separately in each group.
    '''
    tst = 0.0
    for g in np.unique(group):
        gg = group == g
        tst += np.corrcoef(x[gg], y[gg])[0,1]
    return tst

def permuteWithinGroups(x, group, rs=None):
    '''
    Permutes the elements of x within groups
    Input: ndarray x to be permuted, ndarray group of group ids, np.random.RandomState object rs
    '''
    if rs == None:
        rs = np.random.RandomState()
    permuted = x.copy()
    for g in np.unique(group):
        gg = group == g
        permuted[gg] = rs.permutation(permuted[gg])      
    return permuted

def stratCorr(x, y, group, rs, reps=10**4):
    '''
    Simulate permutation p-value of stratified Spearman correlation test.
    Returns test statistic, simulations, left-sided p-value, right-sided p-value, two-sided p-value
    '''
    t = stratCorrTst(x, y, group)
    sims = [stratCorrTst(permuteWithinGroups(x, group, rs), y, group) for i in range(reps)]
    return t, np.sum(sims <= t)/reps, np.sum(sims >= t)/reps, np.sum(np.abs(sims) >= math.fabs(t))/reps, sims



In [19]:
rs = np.random.RandomState(seed=1)

(t, plower, pupper, pboth, sims) = corr(x = teacher_data.q16, y = teacher_data.mean_final_exam, rs = rs)
print t, plower, pupper, pboth
# Want to do it separately for each gender
# Also test correlation between evaluation scores and gender.



# Stratify by gender
(t, plower, pupper, pboth, sims) = stratCorr(x = teacher_data.q16, y = teacher_data.mean_final_exam,
                                            group = teacher_data.gender, rs = rs)
print t, plower, pupper, pboth



0.347221050174 0.9075 0.0946 0.1821
0.352041154138 0.7366 0.2635 0.5303
