Gender Biases in Student Evaluations of Teachers
====================================================


In [1]:
# boilerplate
%matplotlib inline
import math
import numpy as np
import pandas as pd
from numpy.random import random
import scipy as sp
from scipy import special
import matplotlib.pyplot as plt
from __future__ import division

# initialize PRNG
rs = np.random.RandomState(seed=1)

Permutation test code
============
You must install the _permute_ package to use this code. Install instructions can be found at https://github.com/statlab/permute.

In [2]:
from permute.core import corr  
from permute import stratified

## Read data and define new fields

In [3]:
dat = pd.read_stata("../../SET data/permutation_full.dta",  convert_categoricals=False )
#dat = dat[dat.admission_cep == 0]
dat.describe()

Unnamed: 0,student_id,year,entreescpoen,stu_male,stu_female,admission_exam,admission_cep,admission_bactb,admission_other,stu_avg_final,...,history,micro,ip,macro,socio,scpo,both_female,both_male,stumale_proffemale,stufemale_profmale
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18,18.0,...,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,2470.055556,2010.388889,2010.277778,0.5,0.5,0.833333,0.111111,0.055556,0,13.013117,...,0.222222,0.277778,0.166667,0.055556,0.222222,0.055556,0.222222,0.277778,0.222222,0.277778
std,1242.043358,1.243283,1.319784,0.514496,0.514496,0.383482,0.323381,0.235702,0,1.598777,...,0.427793,0.460889,0.383482,0.235702,0.427793,0.235702,0.427793,0.460889,0.427793,0.460889
min,209.0,2008.0,2008.0,0.0,0.0,0.0,0.0,0.0,0,9.833334,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1634.0,2010.0,2009.25,0.0,0.0,1.0,0.0,0.0,0,12.243056,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,2365.5,2010.0,2010.0,0.5,0.5,1.0,0.0,0.0,0,13.270833,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,3404.5,2011.0,2011.0,1.0,1.0,1.0,0.0,0.0,0,14.090278,...,0.0,0.75,0.0,0.0,0.0,0.0,0.0,0.75,0.0,0.75
max,4414.0,2012.0,2012.0,1.0,1.0,1.0,1.0,1.0,0,15.138889,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [4]:
evals = ['q16', 'q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10', 'q13', 'q14', 'q15']
if isinstance(dat.q16[0], str):
    for e in evals:
        dat[e].replace(['nonpertinent', 'insuffisant', 'moyen', 'bon', 'excellent'],\
                       [0,1,2,3,4],\
                       inplace=True)
    
dat['course'] = dat['prof_id']
dat['course'][dat['history']==1] = 'history' 
dat['course'][dat['micro']==1] = 'micro' 
dat['course'][dat['ip']==1] = 'ip' 
dat['course'][dat['macro']==1] = 'macro' 
dat['course'][dat['socio']==1] = 'socio' 
dat['course'][dat['scpo']==1] = 'scpo' 

dat['gender'] = dat.prof_male
dat['gender'][dat.gender==1] = 'M'
dat['gender'][dat.gender==0] = 'F'
dat['student_gender'] = dat.stu_male
dat['student_gender'][dat.student_gender==1] = 'M'
dat['student_gender'][dat.student_gender==0] = 'F'

# Defining the fun times for class
dat['plumTime'] = (dat['extremetime'] < 2) & dat['day'].isin([2,3,4])

## Data filtering

Most of the analyses omit grades in PI, for two reasons: 
1. The final exam is oral
2. There is a large imbalance in the gender of the instructors: 52 male, 12 female
3. Since the course is "easy," that would bias results in favor of making male instructors look more effective

In [5]:
# remove the Political Institutions courses
datNoIp = dat.copy()
datNoIp = datNoIp[datNoIp['course'] != 'ip']

# Start of the analysis

### Ratings v student performance

In [6]:
# Correlation btw avg evaluation score and final exam grade, by course number. Omit IP courses

print 'Analyzing the correlation btw avg evaluation score and final exam grade, by course number' 

theCols = evals + ['prof_male','plumTime','note_cm']

grouped = datNoIp.groupby(['course_number'])
prof_ratings = grouped[theCols].agg(np.mean)
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['note_cm'], seed = rs)
print 'overall', t, pupper, pboth,\
       prof_ratings['note_cm'].size, '\n'

for topic in np.unique(datNoIp.course):
    perTopic = datNoIp[datNoIp['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['note_cm'], seed = rs)
    print topic, t, pupper, pboth, prof_ratings['note_cm'].size

Analyzing the correlation btw avg evaluation score and final exam grade, by course number
overall 0.338935372358 0.114 0.2198 15 

history 0.814345071046 0.1665 0.2507 4
macro nan 0.0 0.0 1
micro 0.556689540198 0.2037 0.367 5
scpo nan 0.0 0.0 1
socio -0.14334148784 0.7473 0.7509 4




### Instructor gender v student performance

In [8]:
# Correlation of final exam and gender, by course.  Excludes IP courses.

print 'Correlation btw final exam average and instructor gender, by course' 

theCols = evals + ['prof_male','note_cm']

grouped = datNoIp.groupby(['course_number'])
prof_ratings = grouped[theCols].agg(np.mean)

(t, plow, pupper, pboth, sims) = corr(x = prof_ratings['note_cm'], y = prof_ratings['prof_male'], seed = rs)
print 'overall', t, pupper, pboth, prof_ratings['prof_male'].sum(), \
      (1-prof_ratings['prof_male']).sum(),\
       prof_ratings['prof_male'].size, '\n'

for topic in np.unique(dat.course):
    perTopic = dat[dat['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['note_cm'], y = prof_ratings['prof_male'], seed = rs)
    print topic, t, pupper, pboth, prof_ratings['prof_male'].sum(), \
    (1-prof_ratings['prof_male']).sum(),  prof_ratings['prof_male'].size 

Correlation btw final exam average and instructor gender, by course
overall -0.466475893983 0.9602 0.0829 8.0 7.0 15 

history -0.683130051064 1.0 0.4998 3.0 1.0 4
ip 0.755928946018 0.3427 0.678 2.0 1.0 3
macro nan 0.0 0.0 1.0 0.0 1
micro -0.412294457624 0.8016 0.41 3.0 2.0 5
scpo nan 0.0 0.0 0.0 1.0 1
socio -0.768831616596 1.0 0.5006 1.0 3.0 4


### Ratings v grade expectations

In [9]:
# Correlation btw avg evaluation score and avg cont assessment grades, by course number. Includes IP
print 'Analyzing the correlation btw avg evaluation score and cont assessment, by course number' 

theCols = evals + ['prof_male','plumTime','note_conf']

grouped = dat.groupby(['course_number'])
prof_ratings = grouped[theCols].agg(np.mean)
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['note_conf'], seed = rs)
print 'overall', t, pupper,\
       prof_ratings['note_conf'].size, '\n'

for topic in np.unique(dat.course):
    perTopic = dat[dat['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['note_conf'], seed = rs)
    print topic, t, pupper, prof_ratings['note_conf'].size
    
    

Analyzing the correlation btw avg evaluation score and cont assessment, by course number
overall 0.496430634506 0.0217 18 

history -0.132453235707 0.743 4
ip 0.944911182523 0.3353 3
macro nan 0.0 1
micro 0.311625984819 0.3039 5
scpo nan 0.0 1
socio 0.57735026919 0.4956 4


### Ratings v instructor gender

In [10]:
# Correlation of avg evaluation score and gender, by course.  Includes IP courses.

print 'Analyzing the correlation btw avg evaluation score and gender, by course' 

theCols = evals + ['prof_male','plumTime']

grouped = dat.groupby(['course_number'])
prof_ratings = grouped[theCols].agg(np.mean)

(t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['prof_male'], seed = rs)
print 'overall', t, pupper, pboth, prof_ratings['prof_male'].sum(), \
      (1-prof_ratings['prof_male']).sum(),\
       prof_ratings['prof_male'].size, '\n'

for topic in np.unique(dat.course):
    perTopic = dat[dat['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['prof_male'], seed = rs)
    print topic, t, pupper, pboth, prof_ratings['prof_male'].sum(), \
    (1-prof_ratings['prof_male']).sum(),  prof_ratings['prof_male'].size   

Analyzing the correlation btw avg evaluation score and gender, by course
overall -0.0545341883149 0.5911 0.9497 10.0 8.0 18 

history -0.132453235707 0.7501 1.0 3.0 1.0 4
ip -0.5 1.0 1.0 2.0 1.0 3
macro nan 0.0 0.0 1.0 0.0 1
micro 0.218217890236 0.5027 1.0 3.0 2.0 5
scpo nan 0.0 0.0 0.0 1.0 1
socio -0.333333333333 1.0 1.0 1.0 3.0 4


### Ratings and gender concordance

In [11]:
# Gender concordance v overall satisfaction, grouped by course
# This looks at how students rate their profs, by course

dMale_stu = dat[dat['stu_male']==1]
dFemale_stu = dat[dat['stu_female']==1]

theCols = evals + ['prof_male','prof_female','plumTime','note_cm']

groupedMale = dMale_stu.groupby(['course_number'])
groupedFemale = dFemale_stu.groupby(['course_number'])
prof_ratings_M = groupedMale[theCols].agg(np.mean)
prof_ratings_F = groupedFemale[theCols].agg(np.mean)
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings_M['q16'], y = prof_ratings_M['prof_male'], seed = rs)
print 'Male students\n'
print 'overall', t, pupper, pboth,\
       prof_ratings_M['prof_male'].size, '\n'

for topic in np.unique(dMale_stu['course']):
    perTopic = dMale_stu[dMale_stu['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['prof_male'], seed = rs)
    print topic, t, pupper, pboth, prof_ratings['prof_male'].size

print 'Female students\n'
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings_F['note_cm'], y = prof_ratings_F['prof_female'],\
                                      seed = rs)
print 'overall', t, pupper,pboth,\
       prof_ratings_F['prof_female'].size, '\n'

for topic in np.unique(dFemale_stu['course']):
    perTopic = dFemale_stu[dFemale_stu['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['prof_female'], seed = rs)
    print topic, t, pupper, pboth, prof_ratings['prof_female'].size

Male students

overall -0.219264504827 0.8749 0.3839 9 

history 1.0 0.5061 1.0 2
ip -1.0 1.0 1.0 2
micro -0.5 1.0 1.0 3
socio nan 0.0 0.0 2
Female students

overall 0.465875904876 0.1043 0.1914 9 

history nan 0.0 0.0 2
ip nan 0.0 0.0 1
macro nan 0.0 0.0 1
micro -1.0 1.0 1.0 2
scpo nan 0.0 0.0 1
socio nan 0.0 0.0 2


### Student performance and gender concordance

In [12]:
# Gender concordance v final exam scores, grouped by course. Excludes IP

dMale_stu = datNoIp[datNoIp['stu_male']==1]
dFemale_stu = datNoIp[datNoIp['stu_female']==1]

theCols = evals + ['prof_male','prof_female','plumTime','note_cm']

groupedMale = dMale_stu.groupby(['course_number'])
groupedFemale = dFemale_stu.groupby(['course_number'])
prof_ratings_M = groupedMale[theCols].agg(np.mean)
prof_ratings_F = groupedFemale[theCols].agg(np.mean)
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings_M['note_cm'], y = prof_ratings_M['prof_male'], seed = rs)
print 'Male students\n'
print 'overall', t, pupper, pboth,\
       prof_ratings_M['prof_male'].size, '\n'

for topic in np.unique(dMale_stu['course']):
    perTopic = dMale_stu[dMale_stu['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['note_cm'], y = prof_ratings['prof_male'], seed = rs)
    print topic, t, pupper, pboth, prof_ratings['prof_male'].size

print 'Female students \n'
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings_F['note_cm'], y = prof_ratings_F['prof_female'], seed = rs)
print 'overall', t, pupper,pboth,\
       prof_ratings_F['prof_female'].size, '\n'

for topic in np.unique(dFemale_stu['course']):
    perTopic = dFemale_stu[dFemale_stu['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['note_cm'], y = prof_ratings['prof_female'], seed = rs)
    print topic, t, pupper, pboth, prof_ratings['prof_female'].size

Male students

overall -0.434990408722 0.85 0.3696 7 

history -1.0 1.0 1.0 2
micro -0.427121098089 0.6722 0.829 3
socio -1.0 1.0 1.0 2
Female students 

overall 0.50251890763 0.1096 0.2224 8 

history nan 0.0 0.0 2
macro nan 0.0 0.0 1
micro 1.0 0.4957 1.0 2
scpo nan 0.0 0.0 1
socio nan 0.0 0.0 2


### Association between evaluation scores and class meeting time

In [13]:
# Correlation of avg evaluation score and desirable time, by course. Includes IP courses
print 'Analyzing the correlation btw avg evaluation score and desirable time, by course' 

theCols = evals + ['prof_male','plumTime']

grouped = dat.groupby(['course_number'])
prof_ratings = grouped[theCols].agg(np.mean)
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['plumTime'], seed = rs)
print 'overall', t, pupper, prof_ratings['plumTime'].sum(), \
      (1-prof_ratings['plumTime']).sum(),\
       prof_ratings['plumTime'].size, '\n'

for topic in np.unique(dat.course):
    perTopic = dat[dat['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['plumTime'], seed = rs)
    print topic, t, pupper, prof_ratings['plumTime'].sum(), \
    (1-prof_ratings['plumTime']).sum(),  prof_ratings['plumTime'].size

Analyzing the correlation btw avg evaluation score and desirable time, by course
overall -0.0545341883149 0.5827 10 8 18 

history -0.229415733871 0.6685 2 2 4
ip 0.5 0.6627 1 2 3
macro nan 0.0 1 0 1
micro -0.327326835354 0.8957 3 2 5
scpo nan 0.0 1 0 1
socio 0.57735026919 0.4976 2 2 4


### Reliability, self-declared investment in a course

In [9]:
# dat with IP => to test for reliability on q10 (student self-assessed involvment in the course)

wrkld=dat[dat['three_evals']==1]['sum_q10']
plt.hist (wrkld)   

KeyError: 'three_evals'

# Other analyses, not in paper

### Ratings and performance, pooled genders and stratified

In [7]:
# test association between ratings and performance, pooled genders.
# Since this uses the final exams, eliminate the IP courses

# group the data by instructor, discipline, gender, student gender

grouped = datNoIp.groupby(['prof_id', 'course', 'gender', 'student_gender'])
prof_ratings = grouped[evals].mean()

pass_fail = lambda x: np.mean([xx >= 10 for xx in x])
pass_rate = grouped['note_finale'].agg([np.mean,pass_fail])

teacher_data = pd.concat([prof_ratings, pass_rate], axis=1)
teacher_data = teacher_data.rename(columns = {'mean':'mean_final_exam', '<lambda>':'pass_rate'}).reset_index()

# Test association between ratings performance, pooled genders.
(t, plower, pupper, pboth, sims) = corr(x = teacher_data.q16, y = teacher_data.mean_final_exam, seed = rs)
print 'Ratings v. performance, pooled gender:',t, plower, pupper, pboth

# Test association between ratings performance, stratified by gender.
(t, plower, pupper, pboth, sims) = stratified.sim_corr(x = teacher_data.q16, y = teacher_data.mean_final_exam,
                                            group = teacher_data.gender, seed = rs)
print 'Ratings v. performance, stratified by gender:', t, plower, pupper, pboth

Ratings v. performance, pooled gender: 0.445655626657 0.952 0.049 0.0967
Ratings v. performance, stratified by gender: 0.888321608083 0.9513 0.0487 0.1183


### Ratings v instructor gender

In [16]:
grouped = dat.groupby(['prof_id'])
theCols = evals + ['prof_male']  # evaluation columns, plus indicator for male prof

prof_ratings = grouped[theCols].agg(np.mean)

(t, plower, pupper, pboth, sims) = corr(x = prof_ratings.q16, y = prof_ratings.prof_male, seed = rs)

print 'mean rating for instructors vs. instructor gender (positive favors males):', t, plower, pupper, pboth

mean rating for instructors vs. instructor gender (positive favors males): -0.0545341883149 0.4661 0.5855 0.9538


### Instructor gender and student performance, student level

In [9]:
# remove students who took triads from instructors all of the same gender or who have missing final grades.
# Since this uses final exams, remove IP courses

datNonzeroVar = datNoIp.copy()
for g in np.unique(dat['student_id']):
    gg = dat['student_id'] == g
    if (np.var(datNoIp['note_cm'][gg]) == 0.0) or (np.var(datNoIp['prof_male'][gg]) == 0.0) or \
        np.any(np.isnan(datNoIp['note_cm'][gg])):
        datNonzeroVar = datNonzeroVar.drop(datNonzeroVar[datNonzeroVar['student_id'] == g].index)

(t, plower, pupper, pboth, sims) = stratified.sim_corr(x = datNonzeroVar[~np.isnan(datNonzeroVar['note_cm'])]['prof_male'],\
                                    y = datNonzeroVar[~np.isnan(datNonzeroVar['note_cm'])]['note_cm'],\
                                    group = datNonzeroVar[~np.isnan(datNonzeroVar['note_cm'])]['student_id'],\
                                    seed = rs, reps=10**3)
                                           
print 'Student-level association between instructor gender and performance:', t, plower, pupper, pboth

Student-level association between instructor gender and performance: 0.0 0.0 0.001 1.0


### Association between instructors' average ratings and average continuous assessment grades, by teacher id

In [17]:
# Correlation of avg evaluation score and avg cont assessment grades, by instructor. 
# Includes IP courses

print 'Analyzing the correlation btw avg evaluation score and cont assessment, by instructor' 

theCols = evals + ['prof_male','plumTime','note_conf']

grouped = dat.groupby(['prof_id'])
prof_ratings = grouped[theCols].agg(np.mean)
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['note_conf'], seed = rs)
print 'overall', t, pupper,\
       prof_ratings['note_conf'].size, '\n'

for topic in np.unique(dat.course):
    perTopic = dat[dat['course']==topic]
    grouped = perTopic.groupby(['prof_id'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['note_conf'], seed = rs)
    print topic, t, pupper, prof_ratings['note_conf'].size
    
    

Analyzing the correlation btw avg evaluation score and cont assessment, by instructor
overall 0.496430634506 0.0193 18 

history -0.132453235707 0.7408 4
ip 0.944911182523 0.3233 3
macro nan 0.0 1
micro 0.311625984819 0.3321 5
scpo nan 0.0 1
socio 0.57735026919 0.503 4


### Association between concordance of student and teacher genders and overall satisfaction

In [18]:
# Correlation of concordance of student and teacher genders and overall satisfaction. Includes IP courses.
print 'Correlation of concordance of student and teacher genders and overall satisfaction'
# Male instructors first

dMale = dat[dat['prof_male']==1]
dFemale = dat[dat['prof_female']==1]

(t, plow, pupper, pboth, sims) = corr(x=dMale['q16'], y=dMale['stu_male'], reps=10**5, seed=rs)
print 'Male instructors:', t, pupper

# Female instructors

(t, plow, pupper, pboth, sims) = corr(x=dFemale['q16'], y=dFemale['stu_female'], reps=10**5, seed=rs)
print 'Female instructors:', t, pupper

Correlation of concordance of student and teacher genders and overall satisfaction
Male instructors: 0.105999788001 0.47336
Female instructors: -0.288675134595 0.85727


### Dimensions of teaching analyses

In [19]:
# Correlation of avg teaching dimension scores and final exam grade, by course number.
# Excludes IP.
print 'Analyzing the correlation btw teaching dimension scores and final exam grade, by course number' 

theCols = evals + ['prof_male','plumTime','note_cm']

grouped = datNoIp.groupby(['course_number'])
prof_ratings = grouped[theCols].agg(np.mean)
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q5'], y = prof_ratings['note_cm'], seed = rs)
print 'overall', t, pupper,\
       prof_ratings['note_cm'].size, '\n'

for topic in np.unique(datNoIp.course):
    perTopic = datNoIp[datNoIp['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q5'], y = prof_ratings['note_cm'], seed = rs)
    print topic, t, pupper, prof_ratings['note_cm'].size
    
    

Analyzing the correlation btw teaching dimension scores and final exam grade, by course number
overall -0.0223338766143 0.5295 15 

history 0.560611910581 0.3254 4
macro nan 0.0 1
micro 0.231915632414 0.4125 5
scpo nan 0.0 1
socio -0.789965081091 1.0 4


In [20]:
# Correlation of avg teaching dimension scores and final exam grade, by course number.
# Since this uses final, omit IP courses

print 'Analyzing the correlation btw teaching dimension scores and final exam grade, by course number' 

theCols = evals + ['prof_male','plumTime','note_cm']

grouped = datNoIp.groupby(['course_number'])
prof_ratings = grouped[theCols].agg(np.mean)
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q5'], y = prof_ratings['note_cm'], seed = rs)

print 'overall', t, pupper,\
       prof_ratings['note_cm'].size, '\n'

for topic in np.unique(datNoIp.course):
    perTopic = datNoIp[datNoIp['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q5'], y = prof_ratings['note_cm'], seed = rs)
    print topic, t, pupper, prof_ratings['note_cm'].size

Analyzing the correlation btw teaching dimension scores and final exam grade, by course number
overall -0.0223338766143 0.5236 15 

history 0.560611910581 0.33 4
macro nan 0.0 1
micro 0.231915632414 0.3947 5
scpo nan 0.0 1
socio -0.789965081091 1.0 4


### Gender concordance analyses

In [21]:
# Gender concordance v continuous assessment grade. Include IP. 
# This looks at how profs are rated by students.

dMale = dat[dat['prof_male']==1]
dMale = dMale[~np.isnan(dMale['note_conf'])]
              
dFemale = dat[dat['prof_female']==1]
dFemale = dFemale[~np.isnan(dFemale['note_conf'])]

(t, plow, pupper, pboth, sims) = corr(x=dMale['note_conf'], y=dMale['stu_male'], seed=rs)
print 'Concordance of genders v continuous assessment, male instructors:', t, pupper
                  
(t, plow, pupper, pboth, sims) = corr(x=dFemale['note_conf'], y=dFemale['stu_female'], seed=rs)
print 'Concordance of genders v continuous assessment, female instructors:', t, pupper

Concordance of genders v continuous assessment, male instructors: 0.189358320929 0.32
Concordance of genders v continuous assessment, female instructors: 0.482143312009 0.1317


In [22]:
# Gender concordance v preparation & organization. Includes IP courses.
# This looks at how profs are rated by students.

# redundant, but safe:
dMale = dat[dat['prof_male']==1]
dMale = dMale[~np.isnan(dMale['q1'])]  # note! need to re-set in following analyses

dFemale = dat[dat['prof_female']==1]
dFemale = dFemale[~np.isnan(dFemale['q1'])]  # note! need to re-set in following analyses

# Male instructors
(t, plow, pupper, pboth, sims) = corr(x=dMale['q1'], y=dMale['stu_male'], seed=rs)
print 'Gender concordance v. preparation and organization, male instructors:', t, pupper

# Female instructors
(t, plow, pupper, pboth, sims) = corr(x=dFemale['q1'], y=dFemale['stu_female'], seed=rs)
print 'Gender concordance v. preparation and organization, female instructors:', t, pupper

Gender concordance v. preparation and organization, male instructors: 0.0909090909091 0.5035
Gender concordance v. preparation and organization, female instructors: -0.288675134595 0.8543


In [23]:
# Gender concordance v quality of instructional material. Includes IP courses.
# This looks at how profs are rated by students.

# redundant, but safe:
dMale = dat[dat['prof_male']==1]
dMale = dMale[~np.isnan(dMale['q2'])]  # note! need to re-set in following analyses

dFemale = dat[dat['prof_female']==1]
dFemale = dFemale[~np.isnan(dFemale['q2'])]  # note! need to re-set in following analyses

# Male instructors
(t, plow, pupper, pboth, sims) = corr(x=dMale['q2'], y=dMale['stu_male'], seed=rs)
print 'Gender concordance v. preparation and organization, male instructors:', t, pupper

# Female instructors
(t, plow, pupper, pboth, sims) = corr(x=dFemale['q2'], y=dFemale['stu_female'], seed=rs)
print 'Gender concordance v. preparation and organization, female instructors:', t, pupper

Gender concordance v. preparation and organization, male instructors: 0.147441956155 0.4496
Gender concordance v. preparation and organization, female instructors: -0.160128153805 0.72


In [24]:
# Gender concordance v quality of animation. Includes IP courses
# This looks at how profs are rated by students.

dMale = dat[dat['prof_male']==1]
dMale = dMale[~np.isnan(dMale['q5'])]
              
dFemale = dat[dat['prof_female']==1]
dFemale = dFemale[~np.isnan(dFemale['q5'])]

(t, plow, pupper, pboth, sims) = corr(x=dMale['q5'], y=dMale['stu_male'], seed=rs)
print 'Gender concordance v animation, male instructors:', t, pupper

(t, plow, pupper, pboth, sims) = corr(x=dFemale['q5'], y=dFemale['stu_female'], seed=rs)
print 'Gender concordance v animation, female instructors:', t, pupper

Gender concordance v animation, male instructors: 0.120385853086 0.4457
Gender concordance v animation, female instructors: 0.160128153805 0.4975


### Differences in continuous assessment and final exam grades analyses

In [25]:
# Correlation btw avg evaluation score and difference btw continuous assessment and final grade, by course number
# Exclude IP courses
print 'Avg evaluation score v difference btw continuous assessment & final grade, by course number' 

theCols = evals + ['prof_male','plumTime','note_cm', 'diff_final_cont']

grouped = datNoIp.groupby(['course_number'])
prof_ratings = grouped[theCols].agg(np.mean)
(t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['diff_final_cont'], seed = rs)
print 'overall', t, pupper,\
       prof_ratings['diff_final_cont'].size, '\n'

for topic in np.unique(datNoIp.course):
    perTopic = datNoIp[datNoIp['course']==topic]
    grouped = perTopic.groupby(['course_number'])
    prof_ratings = grouped[theCols].agg(np.mean)
    (t, plow, pupper, pboth, sims) = corr(x = prof_ratings['q16'], y = prof_ratings['diff_final_cont'], seed = rs)
    print topic, t, pupper, prof_ratings['diff_final_cont'].size 

Avg evaluation score v difference btw continuous assessment & final grade, by course number


KeyError: "Columns not found: 'diff_final_cont'"

In [26]:
# Gender concordance v and difference btw continuous assessment and final grade. Includes IP courses
# This looks at how profs grade students compared to their actual level.

dMale = dat[dat['prof_male']==1]
dMale = dMale[~np.isnan(dMale['diff_final_cont'])]
              
dFemale = dat[dat['prof_female']==1]
dFemale = dFemale[~np.isnan(dFemale['diff_final_cont'])]

(t, plow, pupper, pboth, sims) = corr(x=dMale['diff_final_cont'], y=dMale['stu_male'], seed=rs)
print 'Gender concordance v difference btw continuous assessment and final grade, male instructors:', t, pupper

(t, plow, pupper, pboth, sims) = corr(x=dFemale['diff_final_cont'], y=dFemale['stu_female'], seed=rs)
print 'Gender concordance v difference btw continuous assessment and final grade, female instructors:', t, pupper

KeyError: 'diff_final_cont'