In [14]:
# Import packages
import pandas as pd
import numpy as np
import os
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
import json
import seaborn as sns
from IPython.display import HTML

In [15]:
answer_key = {"26": 1, "27": 1, "28": 2, "29": 1, "30": 2, "31": 1, "32": 2, "33": 2, "34": 2, "35": 2, "36": 1, "37": 1, "38": 2, "39": 2, "40": 1, "41": 2, "42": 1, "43": 2, "44": 2, "45": 2, "46": 2, "47": 1, "48": 1, "49": 2, "50": 2}

In [16]:
def mapValues(df, orig_col, new_col, map_dict):
    df[new_col] = df[orig_col].map(map_dict)
    print("Assigned", new_col)
    return df
def multiValuePctTotal(df, custom_map, value_name):
    temp = {value_name: [], 'Percent of Total':[]}
    for key, value in custom_map.items():
        temp[value_name].append(value['dimension'])
        temp['Percent of Total'].append(value['count']/50)
    return pd.DataFrame(temp).set_index(value_name)
def clicked(arg):
    print("button has been clicked!")

In [17]:
df = pd.read_csv('adhd_v2.csv')
print("There are", df.shape[0], "responses in the original file. We will only be analyzing teachers who completed the survey.")
df = df[(df['Progress']==100) & (df['Q4'].notnull()) & (df['Q8'].notnull())].reset_index(drop=True)
print("Analyzing", df.shape[0], "responses.")
print()
print()
print("Cleaning up data...")
# Clean up vignettes to use gender
df["student_gender"] = np.where((df["FL_16_DO"] == "Block3") | (df["FL_16_DO"] == "Block6"), "male", "female")
print("Assigned Student Gender")

# Clean up teaching years col
df.loc[df['Q8']=='16 years', 'Q8'] = 16
df['Q8'] = df['Q8'].astype('int')
df['years_of_teaching'] = np.where(df["Q8"] < 5, "<5 Years",
                                   np.where((df["Q8"]>=5) & (df["Q8"]<=10), "5-10 Years",
                                           np.where(df["Q8"]>10, ">10 Years","Other")))
print("Assigned Years of Teaching.")


# Clean up teacher gender
df["teacher_gender"] = np.where(df["Q4"] == 1, "male", "female")
print("Assigned Teacher Gender")

# Clean up additional certs
df['Additional Certification'] = np.where(df["Q9 - CERT"] == "Yes", "Yes", "No")
print("Assigned additional certification")

# Clean up encoded values
df = mapValues(df, 'Q11','School Setting', {1:'Public', 2:'Private', 3:'Charter',4:'Other'})
df = mapValues(df, 'Q12','Geographical Area', {1:'Urban', 2:'Suburban', 3:'Rural'})

grades_currently_teaching_map = {
                                1 : {'dimension':'Kindergarten', 'count': 0},
                                2 : {'dimension':'1st', 'count': 0},
                                3 : {'dimension':'2nd', 'count': 0},
                                4 : {'dimension':'3rd', 'count': 0},
                                5 : {'dimension':'4th', 'count': 0},
                                6 : {'dimension':'5th', 'count': 0},
                                7 : {'dimension':'6th', 'count': 0},
                                8 : {'dimension':'7th', 'count': 0},
                                9 : {'dimension':'Other', 'count': 0}
                            }
grades_taught_in_past_map = {
                            1 : {'dimension':'Kindergarten', 'count': 0},
                            2 : {'dimension':'1st', 'count': 0},
                            3 : {'dimension':'2nd', 'count': 0},
                            4 : {'dimension':'3rd', 'count': 0},
                            5 : {'dimension':'4th', 'count': 0},
                            6 : {'dimension':'5th', 'count': 0},
                            7 : {'dimension':'6th', 'count': 0},
                            8 : {'dimension':'7th', 'count': 0},
                            9 : {'dimension':'8th', 'count': 0},
                            10 : {'dimension':'9th', 'count': 0},
                            11 : {'dimension':'10th', 'count': 0},
                            12 : {'dimension':'11th', 'count': 0},
                            13 : {'dimension':'12th', 'count': 0},
                            14 : {'dimension':'Other', 'count': 0}
                            }
type_of_classroom_map = {
                        1 : {'dimension':'General Education', 'count': 0},
                        2 : {'dimension':'Special Education', 'count': 0},
                        3 : {'dimension':'Integrated/Co-taught', 'count': 0},
                        4 : {'dimension':'Other', 'count': 0}
                        }
for i, row in df.iterrows():
    for grade in str(row['Q13']).split(','):
        grades_currently_teaching_map[int(grade)]['count']+=1
    if row['Q14'] is not np.nan:
        for past_grade in str(row['Q14']).split(','):
            grades_taught_in_past_map[int(past_grade)]['count']+=1
    for type_room in str(row['Q15']).split(','):
        type_of_classroom_map[int(type_room)]['count']+=1

print("Generated counts for grades currently teaching, grades taught in the past, and type of classroom.")
# Rename columns
rename_dict = { 'Q25_1':'degree_of_need'}
df = df.rename(columns=rename_dict)
df.head()

There are 50 responses in the original file. We will only be analyzing teachers who completed the survey.
Analyzing 50 responses.


Cleaning up data...
Assigned Student Gender
Assigned Years of Teaching.
Assigned Teacher Gender
Assigned additional certification
Assigned School Setting
Assigned Geographical Area
Generated counts for grades currently teaching, grades taught in the past, and type of classroom.


Unnamed: 0,Progress,Duration (in seconds),Finished,ResponseId,Q1,Q4,Q5,Q8,Q9,Q9 - CERT,...,Q48,Q49,Q50,FL_16_DO,student_gender,years_of_teaching,teacher_gender,Additional Certification,School Setting,Geographical Area
0,100,570,1,R_2XmMOwXkxl03RS1,1,2,26,5,Elementary and early childhood education (BS),No,...,1,2,2.0,Block3,male,5-10 Years,female,No,Public,Suburban
1,100,319,1,R_323Zdlt2e8eOHAZ,1,2,25,4,Bachelors degree- early childhood education,No,...,1,2,1.0,Block3,male,<5 Years,female,No,Public,Suburban
2,100,275,1,R_3KOu9RKNFFUTc9T,1,2,41,11,M Ed,No,...,1,1,2.0,Block7,female,>10 Years,female,No,Private,Suburban
3,100,530,1,R_31j7NdjFkRxxvT5,1,2,27,6,Childhood Special Education,Yes,...,1,2,2.0,Block5,female,5-10 Years,female,Yes,Private,Urban
4,100,346,1,R_2tEx8uK4n6xh7HF,1,2,23,2,,,...,1,1,1.0,Block5,female,<5 Years,female,No,Private,Urban


In [18]:
print("Calculating teacher knowledge of ADHD...")
grades = []
for i, row in df.iterrows():
    curr_grade = 0
    for question_num in range(26,51):
        col = 'Q'+str(question_num)
        if row[col]==answer_key[str(question_num)]:
            curr_grade += 1
    grades.append(curr_grade)
    if curr_grade == 0:
        print(row)
df['teacher_knowledge'] = grades

Calculating teacher knowledge of ADHD...


## Linear Regression

In [19]:
model_0 = ols('degree_of_need ~ teacher_gender+years_of_teaching+student_gender+teacher_knowledge', data=df).fit()
model_0.summary()

0,1,2,3
Dep. Variable:,degree_of_need,R-squared:,0.152
Model:,OLS,Adj. R-squared:,0.056
Method:,Least Squares,F-statistic:,1.576
Date:,"Mon, 20 Feb 2023",Prob (F-statistic):,0.187
Time:,22:19:33,Log-Likelihood:,-105.27
No. Observations:,50,AIC:,222.5
Df Residuals:,44,BIC:,234.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,11.5633,3.401,3.400,0.001,4.710,18.417
teacher_gender[T.male],-1.3623,1.363,-0.999,0.323,-4.110,1.385
years_of_teaching[T.<5 Years],-2.4062,0.919,-2.618,0.012,-4.258,-0.554
years_of_teaching[T.>10 Years],-0.8689,0.686,-1.266,0.212,-2.252,0.514
student_gender[T.male],-0.4300,0.651,-0.661,0.512,-1.742,0.881
teacher_knowledge,-0.1861,0.170,-1.098,0.278,-0.528,0.155

0,1,2,3
Omnibus:,1.394,Durbin-Watson:,1.917
Prob(Omnibus):,0.498,Jarque-Bera (JB):,1.071
Skew:,-0.358,Prob(JB):,0.585
Kurtosis:,2.956,Cond. No.,223.0


## Demographics

In [20]:
# % Total Descriptive Statistics
desc_columns=['teacher_gender', 'years_of_teaching', 'School Setting', 'Geographical Area', 'Additional Certification']
for col in desc_columns:
    display(pd.DataFrame(df[col].value_counts() / len(df)))

Unnamed: 0,teacher_gender
female,0.94
male,0.06


Unnamed: 0,years_of_teaching
>10 Years,0.5
5-10 Years,0.34
<5 Years,0.16


Unnamed: 0,School Setting
Public,0.72
Private,0.22
Charter,0.04
Other,0.02


Unnamed: 0,Geographical Area
Suburban,0.8
Urban,0.2


Unnamed: 0,Additional Certification
Yes,0.76
No,0.24


In [21]:
display(multiValuePctTotal(df, grades_currently_teaching_map, 'Grades Currently Teaching'))
display(multiValuePctTotal(df, grades_taught_in_past_map, 'Grades Taught in Past'))
display(multiValuePctTotal(df, type_of_classroom_map, 'Type of Classroom'))

Unnamed: 0_level_0,Percent of Total
Grades Currently Teaching,Unnamed: 1_level_1
Kindergarten,0.14
1st,0.18
2nd,0.22
3rd,0.32
4th,0.32
5th,0.22
6th,0.1
7th,0.14
Other,0.1


Unnamed: 0_level_0,Percent of Total
Grades Taught in Past,Unnamed: 1_level_1
Kindergarten,0.46
1st,0.44
2nd,0.48
3rd,0.5
4th,0.6
5th,0.42
6th,0.24
7th,0.16
8th,0.12
9th,0.08


Unnamed: 0_level_0,Percent of Total
Type of Classroom,Unnamed: 1_level_1
General Education,0.4
Special Education,0.26
Integrated/Co-taught,0.34
Other,0.24
