In [119]:
# Import packages
import pandas as pd
import numpy as np
import os
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.formula.api import logit
import matplotlib.pyplot as plt
import json
import seaborn as sns
from IPython.display import HTML

In [94]:
answer_key = {"26": 1, "27": 1, "28": 2, "29": 1, "30": 2, "31": 1, "32": 2, "33": 2, "34": 2, "35": 2, "36": 1, "37": 1, "38": 2, "39": 2, "40": 1, "41": 2, "42": 1, "43": 2, "44": 2, "45": 2, "46": 2, "47": 1, "48": 1, "49": 2, "50": 2}

In [95]:
def mapValues(df, orig_col, new_col, map_dict):
    df[new_col] = df[orig_col].map(map_dict)
    print("Assigned", new_col)
    return df
def multiValuePctTotal(df, custom_map, value_name):
    temp = {value_name: [], 'Percent of Total':[]}
    for key, value in custom_map.items():
        temp[value_name].append(value['dimension'])
        temp['Percent of Total'].append(value['count']/len(df))
    return pd.DataFrame(temp).set_index(value_name)#.sort_values(by='Percent of Total', ascending=False)
def clicked(arg):
    print("button has been clicked!")

In [96]:
df = pd.read_csv('adhd_v2.csv')
print("There are", df.shape[0], "responses in the original file. We will only be analyzing teachers who completed the survey.")
df = df[(df['Progress']==100) & (df['Q4'].notnull()) & (df['Q8'].notnull())].reset_index(drop=True)
print("Analyzing", df.shape[0], "responses.")
print()
print()
print("Cleaning up data...")
# Clean up vignettes to use gender
df["student_gender"] = np.where((df["FL_16_DO"] == "Block3") | (df["FL_16_DO"] == "Block6"), "male", "female")
df["adhd_subtype"] = np.where((df["FL_16_DO"] == "Block3") | (df["FL_16_DO"] == "Block7"), "inattentive", "combined")
df = mapValues(df, 'FL_16_DO','Vignette Type', {"Block3":'Inattentive Male', 'Block6':'Combined Male', 'Block5':'Combined Female', 'Block7':'Inattentive Female'})

print("Assigned Vignette attributes: Student Gender and ADHD Subtype")

# Clean up teaching years col
df.loc[df['Q8']=='16 years', 'Q8'] = 16
df['Q8'] = df['Q8'].astype('int')
df['years_of_teaching'] = np.where(df["Q8"] < 5, "<5 Years",
                                   np.where((df["Q8"]>=5) & (df["Q8"]<=10), "5-10 Years",
                                           np.where(df["Q8"]>10, ">10 Years","Other")))
print("Assigned Years of Teaching.")


# Clean up teacher gender
df["teacher_gender"] = np.where(df["Q4"] == 1, "male", "female")
print("Assigned Teacher Gender")

# Clean up additional certs
df['Additional Certification'] = np.where(df["Q9 - CERT"] == "Yes", "Yes", "No")
print("Assigned additional certification")

# Clean up encoded values
df = mapValues(df, 'Q11','School Setting', {1:'Public', 2:'Private', 3:'Charter',4:'Other'})
df = mapValues(df, 'Q12','Geographical Area', {1:'Urban', 2:'Suburban', 3:'Rural'})

grades_currently_teaching_map = {
                                1 : {'dimension':'Kindergarten', 'count': 0},
                                2 : {'dimension':'1st', 'count': 0},
                                3 : {'dimension':'2nd', 'count': 0},
                                4 : {'dimension':'3rd', 'count': 0},
                                5 : {'dimension':'4th', 'count': 0},
                                6 : {'dimension':'5th', 'count': 0},
                                7 : {'dimension':'6th', 'count': 0},
                                8 : {'dimension':'7th', 'count': 0},
                                9 : {'dimension':'Other', 'count': 0}
                            }
grades_taught_in_past_map = {
                            1 : {'dimension':'Kindergarten', 'count': 0},
                            2 : {'dimension':'1st', 'count': 0},
                            3 : {'dimension':'2nd', 'count': 0},
                            4 : {'dimension':'3rd', 'count': 0},
                            5 : {'dimension':'4th', 'count': 0},
                            6 : {'dimension':'5th', 'count': 0},
                            7 : {'dimension':'6th', 'count': 0},
                            8 : {'dimension':'7th', 'count': 0},
                            9 : {'dimension':'8th', 'count': 0},
                            10 : {'dimension':'9th', 'count': 0},
                            11 : {'dimension':'10th', 'count': 0},
                            12 : {'dimension':'11th', 'count': 0},
                            13 : {'dimension':'12th', 'count': 0},
                            14 : {'dimension':'Other', 'count': 0}
                            }
type_of_classroom_map = {
                        1 : {'dimension':'General Education', 'count': 0},
                        2 : {'dimension':'Special Education', 'count': 0},
                        3 : {'dimension':'Integrated/Co-taught', 'count': 0},
                        4 : {'dimension':'Other', 'count': 0}
                        }
student_exhibits_adhd = []
for i, row in df.iterrows():
    for grade in str(row['Q13']).split(','):
        grades_currently_teaching_map[int(grade)]['count']+=1
    if row['Q14'] is not np.nan:
        for past_grade in str(row['Q14']).split(','):
            grades_taught_in_past_map[int(past_grade)]['count']+=1
    for type_room in str(row['Q15']).split(','):
        type_of_classroom_map[int(type_room)]['count']+=1
    if '5' in str(row['Q22']):
        student_exhibits_adhd.append(1)
    else:
        student_exhibits_adhd.append(0)
df['student_exhibits_adhd'] = student_exhibits_adhd    
print("Generated counts for grades currently teaching, grades taught in the past, and type of classroom.")
# Rename columns
rename_dict = { 'Q25_1':'degree_of_need'}
df = df.rename(columns=rename_dict)
df.head()

There are 50 responses in the original file. We will only be analyzing teachers who completed the survey.
Analyzing 50 responses.


Cleaning up data...
Assigned Vignette Type
Assigned Vignette attributes: Student Gender and ADHD Subtype
Assigned Years of Teaching.
Assigned Teacher Gender
Assigned additional certification
Assigned School Setting
Assigned Geographical Area
Generated counts for grades currently teaching, grades taught in the past, and type of classroom.


Unnamed: 0,Progress,Duration (in seconds),Finished,ResponseId,Q1,Q4,Q5,Q8,Q9,Q9 - CERT,...,FL_16_DO,student_gender,adhd_subtype,Vignette Type,years_of_teaching,teacher_gender,Additional Certification,School Setting,Geographical Area,student_exhibits_adhd
0,100,570,1,R_2XmMOwXkxl03RS1,1,2,26,5,Elementary and early childhood education (BS),No,...,Block3,male,inattentive,Inattentive Male,5-10 Years,female,No,Public,Suburban,0
1,100,319,1,R_323Zdlt2e8eOHAZ,1,2,25,4,Bachelors degree- early childhood education,No,...,Block3,male,inattentive,Inattentive Male,<5 Years,female,No,Public,Suburban,1
2,100,275,1,R_3KOu9RKNFFUTc9T,1,2,41,11,M Ed,No,...,Block7,female,inattentive,Inattentive Female,>10 Years,female,No,Private,Suburban,1
3,100,530,1,R_31j7NdjFkRxxvT5,1,2,27,6,Childhood Special Education,Yes,...,Block5,female,combined,Combined Female,5-10 Years,female,Yes,Private,Urban,1
4,100,346,1,R_2tEx8uK4n6xh7HF,1,2,23,2,,,...,Block5,female,combined,Combined Female,<5 Years,female,No,Private,Urban,1


In [97]:
print("Calculating teacher knowledge of ADHD...")
grades = []
for i, row in df.iterrows():
    curr_grade = 0
    for question_num in range(26,51):
        col = 'Q'+str(question_num)
        if row[col]==answer_key[str(question_num)]:
            curr_grade += 1
    grades.append(curr_grade)
    if curr_grade == 0:
        print(row)
df['teacher_knowledge'] = grades

Calculating teacher knowledge of ADHD...


# Descriptive Statistics

## 1. Demographic Tables (Percent of Total)

In [99]:
# % Total Descriptive Statistics
desc_columns=['teacher_gender', 'years_of_teaching', 'School Setting', 'Geographical Area', 'Additional Certification']
for col in desc_columns:
    display(pd.DataFrame(df[col].value_counts() / len(df)))

Unnamed: 0,teacher_gender
female,0.94
male,0.06


Unnamed: 0,years_of_teaching
>10 Years,0.5
5-10 Years,0.34
<5 Years,0.16


Unnamed: 0,School Setting
Public,0.72
Private,0.22
Charter,0.04
Other,0.02


Unnamed: 0,Geographical Area
Suburban,0.8
Urban,0.2


Unnamed: 0,Additional Certification
Yes,0.76
No,0.24


In [100]:
display(multiValuePctTotal(df, grades_currently_teaching_map, 'Grades Currently Teaching'))
display(multiValuePctTotal(df, grades_taught_in_past_map, 'Grades Taught in Past'))
display(multiValuePctTotal(df, type_of_classroom_map, 'Type of Classroom'))

Unnamed: 0_level_0,Percent of Total
Grades Currently Teaching,Unnamed: 1_level_1
Kindergarten,0.14
1st,0.18
2nd,0.22
3rd,0.32
4th,0.32
5th,0.22
6th,0.1
7th,0.14
Other,0.1


Unnamed: 0_level_0,Percent of Total
Grades Taught in Past,Unnamed: 1_level_1
Kindergarten,0.46
1st,0.44
2nd,0.48
3rd,0.5
4th,0.6
5th,0.42
6th,0.24
7th,0.16
8th,0.12
9th,0.08


Unnamed: 0_level_0,Percent of Total
Type of Classroom,Unnamed: 1_level_1
General Education,0.4
Special Education,0.26
Integrated/Co-taught,0.34
Other,0.24


## 2. Descriptive Table - (% of participants presented with each vignette that selected ADHD on Q22)

In [101]:
# pd.DataFrame(
vign_correct_sum = pd.DataFrame(df.groupby('Vignette Type').sum(numeric_only=True)['student_exhibits_adhd'])
vign_cnt = pd.DataFrame(df.groupby('Vignette Type').count()['student_exhibits_adhd']).rename(columns = {'student_exhibits_adhd':'total'})
df_vign = pd.concat([vign_correct_sum,vign_cnt],axis=1)
df_vign['Percent of Participants That Selected ADHD by Vignette Type'] = df_vign['student_exhibits_adhd']/df_vign['total']
df_vign
# ) # / len(df)))

Unnamed: 0_level_0,student_exhibits_adhd,total,Percent of Participants That Selected ADHD by Vignette Type
Vignette Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Combined Female,9,11,0.818182
Combined Male,9,14,0.642857
Inattentive Female,5,11,0.454545
Inattentive Male,7,14,0.5


## 3. Descriptive Table (minimum, maximum, mean, median, SD of responses 0-10 on Q25_1)

In [110]:
df.groupby('Vignette Type').describe()['degree_of_need']

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Vignette Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Combined Female,11.0,7.363636,2.54058,4.0,5.0,7.0,10.0,10.0
Combined Male,14.0,6.928571,2.585887,3.0,5.0,7.5,9.0,10.0
Inattentive Female,11.0,6.454545,1.916436,2.0,5.5,7.0,7.5,9.0
Inattentive Male,14.0,6.642857,1.736803,3.0,6.25,7.0,7.0,10.0


## 4. Descriptive Table (sum, mean and standard deviation of score of ADHD knowledge scale correct responses)

In [103]:
q4_cols = ['teacher_gender', 'years_of_teaching', 'School Setting']
for col in q4_cols:
    display(df.groupby(col).describe()['teacher_knowledge'])

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
teacher_gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
female,47.0,19.340426,1.550268,15.0,18.5,20.0,20.0,22.0
male,3.0,17.666667,4.932883,12.0,16.0,20.0,20.5,21.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
years_of_teaching,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
5-10 Years,17.0,19.294118,1.649421,15.0,19.0,20.0,20.0,22.0
<5 Years,8.0,18.875,1.95941,16.0,17.5,20.0,20.0,21.0
>10 Years,25.0,19.32,1.994158,12.0,19.0,20.0,20.0,22.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
School Setting,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Charter,2.0,20.0,0.0,20.0,20.0,20.0,20.0,20.0
Other,1.0,20.0,,20.0,20.0,20.0,20.0,20.0
Private,11.0,19.454545,2.0181,15.0,18.5,20.0,20.5,22.0
Public,36.0,19.111111,1.878872,12.0,18.0,19.5,20.0,22.0


## 5. Descriptive Table (% of each factor endorsed as influencing decision to refer)

In [104]:
factors_map = {1 : {'dimension': 'impact of difficulties on the child', 'count': 0},
                2 : {'dimension': 'Impact of difficulties on the child’s peers', 'count': 0},
                3 : {'dimension': 'Personal views about referrals', 'count': 0},
                4 : {'dimension': 'Parent views about referrals', 'count': 0},
                5 : {'dimension': 'Stigma surrounding referrals', 'count': 0},
                6 : {'dimension': 'Personal experience with specialist services', 'count': 0},
                7 : {'dimension': 'Minimal awareness of what specialist services offer', 'count': 0},
                8 : {'dimension': 'Other', 'count': 0}
                }
for i, row in df.iterrows():
    for resp in str(row['Q26']).split(','):
        factors_map[int(resp)]['count']+=1
        
multiValuePctTotal(df, factors_map, 'Factors Endorsed as Influencing Decision to Refer')

Unnamed: 0_level_0,Percent of Total
Factors Endorsed as Influencing Decision to Refer,Unnamed: 1_level_1
impact of difficulties on the child,0.92
Impact of difficulties on the child’s peers,0.2
Personal views about referrals,0.24
Parent views about referrals,0.2
Stigma surrounding referrals,0.06
Personal experience with specialist services,0.54
Minimal awareness of what specialist services offer,0.04
Other,0.12


# Statistical Analyses

## 1. ANOVAS (outcome variable = Q25_1)

In [106]:
# Q25_1 = degree_of_need
#a. student gender and degree_of_need
gender_model = ols('degree_of_need ~ student_gender', data=df).fit()
anova_gender_table = sm.stats.anova_lm(gender_model, typ=2)
anova_gender_table

Unnamed: 0,sum_sq,df,F,PR(>F)
student_gender,0.187532,1.0,0.038711,0.844853
Residual,232.532468,48.0,,


In [111]:
# Q25_1 = degree_of_need
#b. adhd subtype and degree_of_need
adhd_subtype_model = ols('degree_of_need ~ adhd_subtype', data=df).fit()
anova_adhd_subtype_table = sm.stats.anova_lm(adhd_subtype_model, typ=2)
anova_adhd_subtype_table

Unnamed: 0,sum_sq,df,F,PR(>F)
adhd_subtype,3.92,1.0,0.822378,0.369016
Residual,228.8,48.0,,


## 2. Logistic Regression

In [124]:
log_model = logit('student_exhibits_adhd ~ adhd_subtype+years_of_teaching+student_gender+teacher_knowledge', data=df).fit()
log_model.summary()

Optimization terminated successfully.
         Current function value: 0.607406
         Iterations 6


0,1,2,3
Dep. Variable:,student_exhibits_adhd,No. Observations:,50.0
Model:,Logit,Df Residuals:,44.0
Method:,MLE,Df Model:,5.0
Date:,"Tue, 21 Feb 2023",Pseudo R-squ.:,0.09748
Time:,00:17:41,Log-Likelihood:,-30.37
converged:,True,LL-Null:,-33.651
Covariance Type:,nonrobust,LLR p-value:,0.2554

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.1962,3.700,-0.864,0.388,-10.449,4.056
adhd_subtype[T.inattentive],-1.1275,0.655,-1.720,0.085,-2.412,0.157
years_of_teaching[T.<5 Years],-0.3711,0.972,-0.382,0.703,-2.277,1.535
years_of_teaching[T.>10 Years],-0.8508,0.734,-1.159,0.246,-2.290,0.588
student_gender[T.male],-0.5176,0.659,-0.785,0.433,-1.810,0.775
teacher_knowledge,0.2600,0.196,1.325,0.185,-0.125,0.645


## 3. Linear Regression

In [112]:
lin_model = ols('degree_of_need ~ adhd_subtype+years_of_teaching+student_gender+teacher_knowledge', data=df).fit()
lin_model.summary()

0,1,2,3
Dep. Variable:,degree_of_need,R-squared:,0.147
Model:,OLS,Adj. R-squared:,0.05
Method:,Least Squares,F-statistic:,1.518
Date:,"Tue, 21 Feb 2023",Prob (F-statistic):,0.204
Time:,00:01:36,Log-Likelihood:,-105.41
No. Observations:,50,AIC:,222.8
Df Residuals:,44,BIC:,234.3
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,10.4151,3.262,3.193,0.003,3.841,16.989
adhd_subtype[T.inattentive],-0.5350,0.619,-0.864,0.392,-1.782,0.712
years_of_teaching[T.<5 Years],-2.3036,0.915,-2.518,0.016,-4.147,-0.460
years_of_teaching[T.>10 Years],-0.7412,0.699,-1.060,0.295,-2.150,0.668
student_gender[T.male],-0.2141,0.625,-0.342,0.734,-1.474,1.046
teacher_knowledge,-0.1273,0.166,-0.766,0.448,-0.462,0.208

0,1,2,3
Omnibus:,2.133,Durbin-Watson:,1.869
Prob(Omnibus):,0.344,Jarque-Bera (JB):,1.857
Skew:,-0.466,Prob(JB):,0.395
Kurtosis:,2.854,Cond. No.,211.0
