In [1]:
import pandas as pd
import numpy as np
from utils import *

In [2]:
#define a function to convert columns name to intutitive column names
def convert_cols(df):
    #rename the columns in all 2(survey) stata do file to intuitive column names
    df=df.rename({
                        'q1':'email',
                        'q4_1':'sim_fbsk',
                        'q5':'sim_txt_fbsk',
                        'q6_1':'sim_cmsk',
                        'q7':'sim_txt_cmsk',
                        'q8':'sim_txt_eth_behavior',
                        'q9':'sim_txt_eth_contribute',
                        'q10_1':'beh_fidgeting',
                        'q10_2':'beh_humming',
                        'q10_3':'beh_excitable',
                        'q10_4':'beh_inattentive',
                        'q10_5':'beh_short_attention',
                        'q10_6':'beh_quarrelsome',
                        'q10_7':'beh_acts_smart',
                        'q10_8':'beh_unpredictable',
                        'q10_9':'beh_defiant',
                        'q10_10':'beh_uncooperative',
                        'q10_11':'beh_easily_frustrated',
                        'q10_12':'beh_disturbs_others',
                        'q10_13':'beh_restless',
                        'q10_14':'beh_mood_changes',
                        'q11_1':'app_coach_stu',
                        'q12_1':'app_adjust_expect',
                        'q13_1':'app_guidance_couns',
                        'q14_1':'app_rec_sped',
                        'q15_1':'app_discp_refer',
                        'q16_1':'app_confer_stu',
                        'q17_1':'app_confer_parent',
                        'q18_1':'app_behavior_plan',
                        'q19_1':'app_challenge_work',
                        'q20_1':'app_spend_time',
                        'q21_1':'app_space_regroup',
                        'q22_1':'app_beh_manage_coach',
                        'q23_1':'app_beh_manage_teach',
                        'q24':'sim_txt_supports',
                        'q25_1':'sim_nervous',
                        'q25_2':'sim_beneficial',
                        'q25_3':'sim_worried_perform',
                        'q25_4':'sim_useful_tool',
                        'q25_5':'sim_relevant_studies',
                        'q25_6':'sim_relevant_prof',
                        'q25_7':'sim_like_use_again',
                        'q25_8':'sim_recommend',
                        'q25_9':'sim_sufficient_prep',
                        'q25_10':'sim_enough_time',
                        'q26':'sim_txt_beneficial',
                        'q27':'sim_txt_improve_exp',
                        'q28':'sim_txt_concerns' 
                       },axis=1)
    return df

In [3]:
def clean_survey_data(data):
    data.columns = pd.Series(data.columns).str.lower().str.strip().str.replace('#','')
    #drop unnecessary observation
    data = data[data['q1'].notna()]
    #drop unnecessary variables
    unused_columns=['enddate', 'status', 'ipaddress', 'progress',
       'duration (in seconds)', 'finished', 'recordeddate', 'responseid',
       'recipientlastname', 'recipientfirstname', 'recipientemail',
       'externalreference', 'locationlatitude', 'locationlongitude',
       'distributionchannel', 'userlanguage']
    data=data.drop(columns=unused_columns)
    #rename the columns to intuitive names
    data=convert_cols(data)
    format_email(data)
    data=drop_duplicate(data)
    data=data.drop(columns=['startdate'])
    reverse_beh = {1:"Not at all", 2: "Just a little", 3: "Pretty much" ,4: "Very much"}
    reverse_sim = {5: "Strongly agree", 4:"Somewhat agree", 3: "Undecided", 2: "Somewhat disagree", 1: "Strongly disagree"}
    beh_map={"Not at all":1,  "Just a little":2,  "Pretty much":3 , "Very much":4}
    sim_map={"Strongly agree":5, "Somewhat agree":4, "Undecided":3, "Somewhat disagree":2, "Strongly disagree":1}
    app_map = {1:10, 2:9, 3:8,4:7,5:6,6:5,7:4,8:3,9:2,10:1}
    #convert columns to right data type
    convert_columns(data,beh_map,sim_map)
    convert_numeric(data)
    #generate Iowa Score Scaling
    generate_iowa_score_scale(data)
    #reverse the approcch scale
    reverse_approach_scale(data,app_map)
    #generate approach scale
    generate_app_scale(data)
    #convert the numeric columns back to categorical data
    convert_columns(data,reverse_beh,reverse_sim)
    
    return data
    
    

    


In [4]:
def clean_redo_data(data):
    data.columns = pd.Series(data.columns).str.lower().str.strip().str.replace('#','')
    #drop unnecessary observation
    data = data[data['q1'].notna()]
    #drop unnecessary variables
    unused_columns=['enddate', 'status', 'ipaddress', 'progress',
       'duration (in seconds)', 'finished', 'recordeddate', 'responseid',
       'recipientlastname', 'recipientfirstname', 'recipientemail',
       'externalreference', 'locationlatitude', 'locationlongitude',
       'distributionchannel', 'userlanguage']
    data=data.drop(columns=unused_columns)
    #rename the columns to intuitive names
    data=convert_cols(data)
    format_email(data)
    data=drop_duplicate(data)
    data=data.drop(columns=['startdate'])
    reverse_beh = {1:"Not at all", 2: "Just a little", 3: "Pretty much" ,4: "Very much"}
    reverse_sim = {5: "Strongly agree", 4:"Somewhat agree", 3: "Undecided", 2: "Somewhat disagree", 1: "Strongly disagree"}
    beh_map={"Not at all":1,  "Just a little":2,  "Pretty much":3 , "Very much":4}
    sim_map={"Strongly agree":5, "Somewhat agree":4, "Undecided":3, "Somewhat disagree":2, "Strongly disagree":1}
    app_map = {1:10, 2:9, 3:8,4:7,5:6,6:5,7:4,8:3,9:2,10:1}
    #generate Iowa Score Scaling
    generate_iowa_score_scale(data)
    #reverse the approcch scale
    reverse_approach_scale(data,app_map)
    #generate approach scale
    generate_app_scale(data)
    #convert the numeric columns back to categorical data
    data.loc[:,'beh_fidgeting':'beh_mood_changes']=data.loc[:,'beh_fidgeting':'beh_mood_changes'].astype("int")
    data.loc[:,'beh_fidgeting':'beh_mood_changes']=data.loc[:,'beh_fidgeting':'beh_mood_changes'].replace(reverse_beh)
    
    
    return data
    

In [5]:
data_2a = pd.read_csv("../data/Post-Simulator Baseline Student Survey- Summer 2018_January 22, 2020_16.58.csv",skiprows=[1,2])


In [6]:
#clean the data for 2a 
data_2a=clean_survey_data(data_2a)
data_2a['time']=0
data_2a = data_2a.set_index('email')
data_2a=data_2a.sort_values(by=['email'])
data_2a.reset_index(inplace=True)
data_2a.to_csv("Summer2018_Baseline_Post-Survey_Cleaned.csv",index=False)
data_2a.head(1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  email_fix['email']=email_fix['email'].apply(lambda x:fix_email_add(x))


Unnamed: 0,email,sim_fbsk,sim_txt_fbsk,sim_cmsk,sim_txt_cmsk,sim_txt_eth_behavior,sim_txt_eth_contribute,beh_fidgeting,beh_humming,beh_excitable,...,app_confer_parent_rc,app_behavior_plan_rc,app_challenge_work_rc,app_spend_time_rc,app_space_regroup_rc,app_beh_manage_coach_rc,app_beh_manage_teach_rc,manage_app_negative,manage_app_positive,time
2,gct2y@virginia.edu,6.0,A few of the students seemed a bit confused ab...,5.0,"The students provided great ideas, but even th...","Ethan seemed a little bit too cool for school,...","I've seen students like Ethan, and the most su...",Pretty much,Just a little,Just a little,...,8.0,5.0,4.0,3.0,7.0,7.0,3.0,2.8,5.25,0


In [7]:
#read the survey for redo
redo = pd.read_csv("../data/Post-Simulator Baseline Student Survey (only for Redos)- Summer 2018 - Copy_January 23, 2020_07.22.csv",skiprows=[1,2])


In [8]:
#cleanning the redo data
redo=clean_redo_data(redo)
redo['time']=0

In [9]:
#mannual correct email id for redo data
i = redo[((redo.email == 'kr2tvd@virginia.edu'))].index
redo.at[i,'email']="krv2td@virginia.edu"
redo.to_csv("Summer2018_Baseline_Redos_Post-Survey_Cleaned.csv",index=False)

In [10]:
#merge the dataset using redo to update the datas
redo = redo.set_index('email')
data_2a = data_2a.set_index('email')
data_2a.update(redo)
data_2a=data_2a.sort_values(by=['email'])
data_2a.reset_index(inplace=True)
data_2a.head(1)
data_2a.to_csv("Summer2018_Baseline_Post-Survey_Merged_Cleaned.csv",index=False)

Unnamed: 0,email,sim_fbsk,sim_txt_fbsk,sim_cmsk,sim_txt_cmsk,sim_txt_eth_behavior,sim_txt_eth_contribute,beh_fidgeting,beh_humming,beh_excitable,...,app_confer_parent_rc,app_behavior_plan_rc,app_challenge_work_rc,app_spend_time_rc,app_space_regroup_rc,app_beh_manage_coach_rc,app_beh_manage_teach_rc,manage_app_negative,manage_app_positive,time
0,ajm8gx@virginia.edu,10.0,'-I think that delving into the background of ...,10.0,'-In disciplining Ethan I was able to make sur...,Ethan was off-task often.\nHe interrupted othe...,He may have ADD or another IEP. He may have a ...,Very much,Very much,Very much,...,1.0,1.0,5.0,1.0,2.0,1.0,1.0,8.2,9.375,0.0


In [11]:
data_2b = pd.read_csv("../data/Post-Simulator Baseline Student Survey- Fall 2018_July 24, 2019_15.43.csv",skiprows=[1,2])
data_2b.head()

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,RecipientLastName,...,Q25_4,Q25_5,Q25_6,Q25_7,Q25_8,Q25_9,Q25_10,Q26,Q27,Q28
0,2018-09-17 07:05:03,2018-09-17 07:36:27,IP Address,199.111.228.149,100,1883,True,2018-09-17 07:36:28,R_22F9vTdxw2PCTFs,,...,Strongly agree,Strongly agree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,Strongly agree,It helped me to see how students may act in cl...,I thought the simulator was good and nothing n...,I have concerns about how I respond to childre...
1,2018-09-17 07:37:53,2018-09-17 08:10:51,IP Address,199.111.228.149,100,1978,True,2018-09-17 08:10:51,R_24iVOclK34K8Awf,,...,Undecided,Strongly agree,Strongly agree,Somewhat agree,Somewhat agree,Strongly disagree,Somewhat agree,It brought weakness right into focus. I am rea...,Feedback right then and there.\n\nPrior notice...,How am I supposed to get feedback to improve? ...
2,2018-09-17 08:12:42,2018-09-17 08:35:28,IP Address,199.111.228.149,100,1366,True,2018-09-17 08:35:29,R_3qlIj3VjGBL2EgZ,,...,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,It exposed me to students who may have behavio...,,
3,2018-09-17 08:35:42,2018-09-17 09:01:40,IP Address,199.111.228.149,100,1557,True,2018-09-17 09:01:40,R_1I9Q6ND18lHriSP,,...,Somewhat agree,Somewhat agree,Strongly agree,Undecided,Somewhat disagree,Somewhat agree,Strongly agree,"Seeing how I handle on the spot teaching, beha...",There were limitations such as only beginning ...,I did not handle Ethan correctly and got easil...
4,2018-09-17 09:01:57,2018-09-17 09:17:03,IP Address,199.111.228.149,100,905,True,2018-09-17 09:17:04,R_Y3bp74qqJ6l5tWV,,...,Strongly agree,Strongly agree,Strongly agree,Somewhat agree,Somewhat agree,Somewhat agree,Undecided,It was actually pretty realistic. You didn't k...,I wish that I knew better behavior management ...,I am a little worried that my teacher language...


In [12]:
#clean the data for 2b 
data_2b=clean_survey_data(data_2b)
data_2b['time']=0
data_2b = data_2b.set_index('email')
data_2b=data_2b.sort_values(by=['email'])
data_2b.reset_index(inplace=True)
data_2b.to_csv("Fall2018_Baseline_Post-Survey_Cleaned.csv",index=False)
data_2b.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  email_fix['email']=email_fix['email'].apply(lambda x:fix_email_add(x))


Unnamed: 0,email,sim_fbsk,sim_txt_fbsk,sim_cmsk,sim_txt_cmsk,sim_txt_eth_behavior,sim_txt_eth_contribute,beh_fidgeting,beh_humming,beh_excitable,...,app_confer_parent_rc,app_behavior_plan_rc,app_challenge_work_rc,app_spend_time_rc,app_space_regroup_rc,app_beh_manage_coach_rc,app_beh_manage_teach_rc,manage_app_negative,manage_app_positive,time
0,ab2dx@virginia.edu,5.0,'-I think I helped them understand it a little...,4.0,'-I tried getting Ethan to tune into the lesso...,'-disruptive \n-distracting \n-random \n,'-I know sometimes students are yearning for a...,Very much,Very much,Very much,...,5.0,2.0,7.0,3.0,2.0,8.0,3.0,4.8,6.75,0


In [13]:
data_2c = pd.read_csv("../data/Post-Simulator Student Survey (Round 1)- Spring 2019_July 24, 2019_15.25.csv",skiprows=[1,2])


In [14]:
#clean the data for 2c 
data_2c=clean_survey_data(data_2c)
data_2c['time']=2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  email_fix['email']=email_fix['email'].apply(lambda x:fix_email_add(x))


In [15]:
#correct email id
i = data_2c[((data_2c.email == 'kr2a2fn@virginia.edu'))].index
data_2c.at[i,'email']="kra2fn@virginia.edu"

In [16]:
data_2c = data_2c.set_index('email')
data_2c=data_2c.sort_values(by=['email'])
data_2c.reset_index(inplace=True)
data_2c.to_csv("Spring2019_Coaching_Post-Survey_Cleaned.csv",index=False)
data_2c.head(1)

Unnamed: 0,email,sim_cmsk,sim_txt_cmsk,sim_txt_eth_behavior,sim_txt_eth_contribute,beh_fidgeting,beh_humming,beh_excitable,beh_inattentive,beh_short_attention,...,app_confer_parent_rc,app_behavior_plan_rc,app_challenge_work_rc,app_spend_time_rc,app_space_regroup_rc,app_beh_manage_coach_rc,app_beh_manage_teach_rc,manage_app_negative,manage_app_positive,time
0,ac4ah@virginia.edu,3.0,Classroom management was low because students ...,Distracting\nUnnecessay,External influences from home\nLack of structu...,Pretty much,Very much,Pretty much,Very much,Very much,...,4.0,3.0,5.0,3.0,4.0,2.0,4.0,3.2,7.625,2


In [17]:
data_2d = pd.read_csv("../data/Post-Simulator Student Survey Round 2- Spring 2019_July 24, 2019_15.24.csv",skiprows=[1,2])


In [18]:
#clean the data for 2d 
data_2d=clean_survey_data(data_2d)
data_2d['time']=3
data_2d = data_2d.set_index('email')
data_2d=data_2d.sort_values(by=['email'])
data_2d.reset_index(inplace=True)
data_2d.to_csv("Spring2019_Exit_Post-Survey_Cleaned.csv",index=False)
data_2d.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  email_fix['email']=email_fix['email'].apply(lambda x:fix_email_add(x))


Unnamed: 0,email,sim_fbsk,sim_txt_fbsk,q39,sim_cmsk,sim_txt_cmsk,sim_txt_eth_behavior,sim_txt_eth_contribute,beh_fidgeting,beh_humming,...,app_confer_parent_rc,app_behavior_plan_rc,app_challenge_work_rc,app_spend_time_rc,app_space_regroup_rc,app_beh_manage_coach_rc,app_beh_manage_teach_rc,manage_app_negative,manage_app_positive,time
0,ab2dx@virginia.edu,7.0,I think that all of the students made great po...,,2.0,I was unable to get Ethan and Dev to stop dist...,"distracting, disrespectful, impulsive","lack of attention from adults in his life, som...",Pretty much,Very much,...,4.0,2.0,7.0,4.0,2.0,5.0,6.0,3.6,6.875,3


In [19]:
#append four datasets
full=data_2a.append(data_2b)
full=full.append(data_2c)
full=full.append(data_2d)
full

Unnamed: 0,email,sim_fbsk,sim_txt_fbsk,sim_cmsk,sim_txt_cmsk,sim_txt_eth_behavior,sim_txt_eth_contribute,beh_fidgeting,beh_humming,beh_excitable,...,app_beh_manage_teach_rc,manage_app_negative,manage_app_positive,time,q39,q34,q35_1,q36_1,q37,q38
0,ajm8gx@virginia.edu,10.0,'-I think that delving into the background of ...,10.0,'-In disciplining Ethan I was able to make sur...,Ethan was off-task often.\nHe interrupted othe...,He may have ADD or another IEP. He may have a ...,Very much,Very much,Very much,...,1.0,8.2,9.375,0.0,,,,,,
1,alh8pk@virginia.edu,5.0,They seemed to have a good grasp on what made ...,,I couldn't get to the discussion about class n...,"Ethan was very distracted, full of energy, and...","He may have a short attention span, possibly A...",Very much,Very much,Pretty much,...,1.0,5.0,8.375,0.0,,,,,,
2,alp7q@virginia.edu,5.0,'- I thought I did okay with going from questi...,2.0,I had a brief moment of success when Savannah ...,He was disruptive but occasionally responsive ...,I'd want to know if there was a reason why he ...,Very much,Very much,Just a little,...,1.0,6.4,8.125,0.0,,,,,,
3,art2vr@virginia.edu,6.0,I honestly was more concerned with making sure...,7.0,I think I did a good job on this. I asked the ...,Super distracting. Every time I thought he was...,I think the guidance counselor and parent/guar...,Just a little,Very much,Pretty much,...,1.0,5.2,8.500,0.0,,,,,,
4,bab7rr@virginia.edu,8.0,The students were on-task and seemed intereste...,4.0,The class provided one class expectation (to b...,"In this situation, Ethan was very off-task and...",A real student may act this way in class becau...,Pretty much,Very much,Very much,...,4.0,5.4,5.750,0.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,syc2rw@virginia.edu,7.0,'- asking where in the text they found their a...,7.0,'- telling Ethan to stop\n- telling Dev to sto...,'- distracted\n- fun-loving\n- not engaged,"'- home life\n- boredom, excitement about some...",Pretty much,Pretty much,Pretty much,...,3.0,4.6,6.250,3.0,,No,,,,
74,tdb5bd@virginia.edu,8.0,I think my feedback directed students back to ...,7.0,I was direct with students when they were bein...,Ethan's behavior was distracting and often beg...,I think his home life might contribute to his ...,Pretty much,Pretty much,Pretty much,...,4.0,4.6,7.250,3.0,,No,,,,
75,wmn6kc@virginia.edu,9.0,I asked students to provide text evidence and ...,8.0,I heard from multiple students about their ide...,"Some distraction, inattentiveness. Spoke out o...","Possibly LD. I would need access to IEP, 504, ...",Just a little,Pretty much,Pretty much,...,3.0,2.6,6.375,3.0,,No,,,,
76,wwb4ka@virginia.edu,8.0,'- We started to delve into the reasons why Li...,8.0,"'- Ethan was fairly on point, and not a major ...",Not to bad. He was fairly on target throughou...,Boredom.,Just a little,Pretty much,Just a little,...,4.0,1.8,5.375,3.0,,No,,,,


In [20]:
#reset index
full.reset_index(inplace=True)
full=full.drop(columns=['index'])

In [21]:
exit_emails=["bsd7cv@virginia.edu","bh4fk@virginia.edu","alh8pk@virginia.edu","ahm4kv@virginia.edu"]
for email in exit_emails:
    i = full[((full.email == email))].index
    full=full.drop(i)

In [22]:
full = full.set_index('email')
full=full.sort_values(by=['email'])
full.reset_index(inplace=True)
full.to_csv("Fall2018Spring2019_Post-Survey_Cleaned.csv",index=False)


In [24]:
#Merging with randomization information
randomization=pd.read_stata("../data/SimTeacher_Randomization_Fall2018_Spring2019.dta")


randomization.columns = pd.Series(randomization.columns).str.lower().str.strip()
randomization
#randomization.loc[randomization.email=='es8fa@virginia.edu']

Unnamed: 0,name_full,name,sis_id,program,section,email,strata,rannum,temp,numofstudents,tr_n,ra,fall2018coachingra,fall2018racera,fall2018coachingnotes,spring2019coachingra,spring2019racera
0,"Alba,Elena M","Alba,Elena",ema4ex,secondary,new,ema4ex@virginia.edu,Sec New,0.730109,25,33,8,TW,Coaching,White,No notes,No Coaching,Black
1,"All,Hannah Adare","All,Hannah",haa8kt,elementary,Monday,haa8kt@virginia.edu,Elem Mon,0.230620,8,39,10,CB,No Coaching,Black,No coaching conversation,Coaching,White
2,"Appleton,Megan Christine","Appleton,Megan",mca5hu,elementary,Tuesday,mca5hu@virginia.edu,Elem Tues,0.213969,9,36,9,CB,No Coaching,Black,No coaching conversation,Coaching,White
3,"Atkeson,Kate Rebecca","Atkeson,Kate",kra2fn,elementary,Monday,kra2fn@virginia.edu,Elem Mon,0.594512,22,39,10,TB,Coaching,Black,Coaching conversation,No Coaching,Black
4,"Baber,Lauren Brooke","Baber,Lauren",lbb8th,elementary,Monday,lbb8th@virginia.edu,Elem Mon,0.323117,15,39,10,CW,No Coaching,White,No coaching conversation,No Coaching,White
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,"Winkler,Lily Ann","Winkler,Lily",law2mc,elementary,Monday,law2mc@virginia.edu,Elem Mon,0.908203,32,39,10,TW,Coaching,White,Coaching conversation,Coaching,Black
113,"Wright,Joseph Nathaniel","Wright,Joseph",jnw5tp,secondary,new,jnw5tp@virginia.edu,Sec New,0.122668,6,33,8,CB,No Coaching,Black,No notes,Coaching,White
114,"Xin,Christie","Xin,Christie",cx8vu,elementary,Tuesday,cx8vu@virginia.edu,Elem Tues,0.229896,10,36,9,CW,No Coaching,White,No coaching conversation,Coaching,Black
115,"Yancey,Katherine Blair","Yancey,Katherine",kby6hf,secondary,new,kby6hf@virginia.edu,Sec New,0.092158,4,33,8,CB,No Coaching,Black,No coaching conversation,Coaching,Black


In [25]:
full=full.merge(randomization,left_on='email',right_on="email",how='inner')
full

Unnamed: 0,email,sim_fbsk,sim_txt_fbsk,sim_cmsk,sim_txt_cmsk,sim_txt_eth_behavior,sim_txt_eth_contribute,beh_fidgeting,beh_humming,beh_excitable,...,rannum,temp,numofstudents,tr_n,ra,fall2018coachingra,fall2018racera,fall2018coachingnotes,spring2019coachingra,spring2019racera
0,ab2dx@virginia.edu,5.0,'-I think I helped them understand it a little...,4.0,'-I tried getting Ethan to tune into the lesso...,'-disruptive \n-distracting \n-random \n,'-I know sometimes students are yearning for a...,Very much,Very much,Very much,...,0.282820,11,39,10,CW,No Coaching,White,No coaching conversation,No Coaching,Black
1,ab2dx@virginia.edu,7.0,I think that all of the students made great po...,2.0,I was unable to get Ethan and Dev to stop dist...,"distracting, disrespectful, impulsive","lack of attention from adults in his life, som...",Pretty much,Very much,Pretty much,...,0.282820,11,39,10,CW,No Coaching,White,No coaching conversation,No Coaching,Black
2,ac4ah@virginia.edu,,,3.0,Classroom management was low because students ...,Distracting\nUnnecessay,External influences from home\nLack of structu...,Pretty much,Very much,Pretty much,...,0.016961,1,36,9,CB,No Coaching,Black,No coaching conversation,No Coaching,Black
3,ac4ah@virginia.edu,3.0,'-We did not get very far though the story\n-T...,2.0,'-Our conversation about classroom norms was n...,'-Distracting \n-Disruptive\n-Unproductive to ...,'-Ethan seemed to be seeking attention \n-He i...,Pretty much,Very much,Just a little,...,0.016961,1,36,9,CB,No Coaching,Black,No coaching conversation,No Coaching,Black
4,ac4ah@virginia.edu,5.0,Most students were able to talk\nNot many feed...,6.0,We were able to come up with 2 expectations\nI...,He was a distraction and clearly not engaged,I think the material was uninteresting to him\...,Pretty much,Pretty much,Not at all,...,0.016961,1,36,9,CB,No Coaching,Black,No coaching conversation,No Coaching,Black
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282,yk5kb@virginia.edu,6.0,'-I asked students to explain their rationale ...,8.0,"'-Although there was an awkward silence, conve...",distracted\n,"possibly adhd or a learning disability, proble...",Very much,Very much,Very much,...,0.958017,33,33,8,TW,Coaching,White,Coaching conversation,No Coaching,White
283,yk5kb@virginia.edu,,,8.0,'- I redirected conversations that were not re...,'- Ethan was distracted and had trouble stayin...,"'- Ethan may just not like being in school, or...",Very much,Very much,Very much,...,0.958017,33,33,8,TW,Coaching,White,Coaching conversation,No Coaching,White
284,zrh3aq@virginia.edu,,,8.0,'-I solicited student ideas as to what classro...,Distracting (to other students and myself)\n,Need for attention\nTrouble focusing/ staying ...,Very much,Very much,Pretty much,...,0.450501,15,33,8,CW,No Coaching,White,No coaching conversation,Coaching,White
285,zrh3aq@virginia.edu,6.0,'-I asked students to give their responses to ...,,'-I couldn't get the students to focus long en...,"Ethan is disruptive, he is having trouble enga...",'-He might have ADHD\n-He might just be seekin...,Very much,Very much,Pretty much,...,0.450501,15,33,8,CW,No Coaching,White,No coaching conversation,Coaching,White


In [26]:
#Merging with CPP covariate data
cpp=pd.read_stata("../data/2018_2019_CPP_data.dta")
cpp

Unnamed: 0,student,recipientlastname,recipientfirstname,ytrt_total,crtse_total,das_depression,das_anxiety,das_stress,ccs_gpa,partch,...,hsach_3,race_white,race_white_miss,hsses_miss,hsach_miss,partch_either,moedu_colab,faedu_colab,gender_female,gender_female_miss
0,1382103,Dezio,Stephanie,5.8,45.925926,0.428571,0.571429,1.714286,3.70,4.0,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
1,1498517,Buppert,Sydney,6.6,86.370369,0.000000,0.000000,0.857143,3.90,4.0,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
2,2018652,Plein,Abigail,5.6,74.222221,0.142857,0.142857,1.142857,3.90,4.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,2064130,Heard,Zora,5.8,56.148148,0.714286,0.857143,1.142857,3.50,3.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0
4,2176293,Holowinsky,Anna,6.6,56.666668,1.285714,1.428571,1.428571,3.80,4.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,2772719,Degrush,Jennifer,7.0,64.740738,0.285714,0.571429,1.714286,3.84,4.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
114,2772863,Spaulding,Ellie,6.8,59.074074,0.285714,0.571429,1.285714,3.50,4.0,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
115,2775384,Chay,Casey,5.8,66.296295,0.285714,0.285714,0.142857,3.63,4.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
116,2534432,,,,,,,,,,...,,,1.0,1.0,1.0,,,,,1.0


In [27]:
full=full.merge(cpp,on='email',how='inner')
full

Unnamed: 0,email,sim_fbsk,sim_txt_fbsk,sim_cmsk,sim_txt_cmsk,sim_txt_eth_behavior,sim_txt_eth_contribute,beh_fidgeting,beh_humming,beh_excitable,...,hsach_3,race_white,race_white_miss,hsses_miss,hsach_miss,partch_either,moedu_colab,faedu_colab,gender_female,gender_female_miss
0,ab2dx@virginia.edu,5.0,'-I think I helped them understand it a little...,4.0,'-I tried getting Ethan to tune into the lesso...,'-disruptive \n-distracting \n-random \n,'-I know sometimes students are yearning for a...,Very much,Very much,Very much,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,ab2dx@virginia.edu,7.0,I think that all of the students made great po...,2.0,I was unable to get Ethan and Dev to stop dist...,"distracting, disrespectful, impulsive","lack of attention from adults in his life, som...",Pretty much,Very much,Pretty much,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,ac4ah@virginia.edu,,,3.0,Classroom management was low because students ...,Distracting\nUnnecessay,External influences from home\nLack of structu...,Pretty much,Very much,Pretty much,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
3,ac4ah@virginia.edu,3.0,'-We did not get very far though the story\n-T...,2.0,'-Our conversation about classroom norms was n...,'-Distracting \n-Disruptive\n-Unproductive to ...,'-Ethan seemed to be seeking attention \n-He i...,Pretty much,Very much,Just a little,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
4,ac4ah@virginia.edu,5.0,Most students were able to talk\nNot many feed...,6.0,We were able to come up with 2 expectations\nI...,He was a distraction and clearly not engaged,I think the material was uninteresting to him\...,Pretty much,Pretty much,Not at all,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,yk5kb@virginia.edu,6.0,'-I asked students to explain their rationale ...,8.0,"'-Although there was an awkward silence, conve...",distracted\n,"possibly adhd or a learning disability, proble...",Very much,Very much,Very much,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
280,yk5kb@virginia.edu,,,8.0,'- I redirected conversations that were not re...,'- Ethan was distracted and had trouble stayin...,"'- Ethan may just not like being in school, or...",Very much,Very much,Very much,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
281,zrh3aq@virginia.edu,,,8.0,'-I solicited student ideas as to what classro...,Distracting (to other students and myself)\n,Need for attention\nTrouble focusing/ staying ...,Very much,Very much,Pretty much,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0
282,zrh3aq@virginia.edu,6.0,'-I asked students to give their responses to ...,,'-I couldn't get the students to focus long en...,"Ethan is disruptive, he is having trouble enga...",'-He might have ADHD\n-He might just be seekin...,Very much,Very much,Pretty much,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0


In [42]:
full=full.rename(columns = {'name_full':'name'})

In [43]:
full.drop('name_x', inplace=True, axis=1)
full.drop('name_y', inplace=True, axis=1)
full.to_csv("SurveyOutcomes_Randomization_CPP_Merged.csv",index=False)