# Participant Tracker

This notebook serves as a development area for the code used in the participant tracker.

In [1]:
import pandas as pd
import numpy as np

## Connect to Database

In [2]:
import mysql.connector

In [3]:
endpoint = "seratestdatabase.c4cjk1vto1om.us-east-2.rds.amazonaws.com"
port = "3306"
usr = "admin"
pswd = "your_password_here"
region = "us-east-2b"
dbname = "teachsim"

In [4]:
cnx = mysql.connector.connect(user=usr, password=pswd, host=endpoint, database=dbname)

In [5]:
cursor = cnx.cursor(buffered=True)

## Dictionary of CSV Names

In [7]:
names = {"Participant_Information_Survey": ["2018_2019_participant_measures.csv"],
        "Baseline_Survey": ["2018_summer_baseline_postsim_survey.csv", "2018_fall_baseline_postsim_survey.csv"],
        "Classroom_Norms_Post_Sim_Survey": ["2019_spring_precoach_br_postsim_survey.csv"],
        "Exit_Survey": ["2019_spring_ exit_postsim_survey.csv"],
        "Classroom_Norms_Coding_Baseline": ["2018_summer_baseline_br_perform.csv", "2018_fall_baseline_br_perform.csv"],
        "Classroom_Norms_Coding_Precoach": ["2019_spring_coach_br_perform.csv"],
        "Classroom_Norms_Coding_Postcoach": ["2019_spring_coach_br_perform.csv"],
        "Classroom_Norms_Coding_Exit": ["2019_spring_exit_br_perform.csv"]}

In [15]:
all_csvs = []
for item in list(names.values()):
    all_csvs = all_csvs + item
all_csvs

['2018_2019_participant_measures.csv',
 '2018_summer_baseline_postsim_survey.csv',
 '2018_fall_baseline_postsim_survey.csv',
 '2019_spring_precoach_br_postsim_survey.csv',
 '2019_spring_ exit_postsim_survey.csv',
 '2018_summer_baseline_br_perform.csv',
 '2018_fall_baseline_br_perform.csv',
 '2019_spring_coach_br_perform.csv',
 '2019_spring_coach_br_perform.csv ',
 '2019_spring_exit_br_perform.csv']

## Create Table in Database

In [39]:
cursor.execute("DROP TABLE Participant_Tracker;")

In [40]:
cols = ["Participant_ID"] + list(names.keys())
cols

['Participant_ID',
 'Participant_Information_Survey',
 'Baseline_Survey',
 'Classroom_Norms_Post_Sim_Survey',
 'Exit_Survey',
 'Classroom_Norms_Coding_Baseline',
 'Classroom_Norms_Coding_Precoach',
 'Classroom_Norms_Coding_Postcoach',
 'Classroom_Norms_Coding_Exit']

In [44]:
query = "CREATE TABLE Participant_Tracker ("
for col in cols:
    query = query + col + " varchar(100), "
query = query + "PRIMARY KEY (Participant_ID) );"

In [45]:
query

'CREATE TABLE Participant_Tracker (Participant_ID varchar(100), Participant_Information_Survey varchar(100), Baseline_Survey varchar(100), Classroom_Norms_Post_Sim_Survey varchar(100), Exit_Survey varchar(100), Classroom_Norms_Coding_Baseline varchar(100), Classroom_Norms_Coding_Precoach varchar(100), Classroom_Norms_Coding_Postcoach varchar(100), Classroom_Norms_Coding_Exit varchar(100), PRIMARY KEY (Participant_ID) );'

In [46]:
cursor.execute(query)

## View Table

In [8]:
cursor.execute("DELETE FROM Participant_Tracker;")

In [6]:
cursor.execute("SELECT * FROM Participant_Tracker;")
result = cursor.fetchall()
colnames = [x[0] for x in cursor.description]
pd.DataFrame(result, columns=colnames)

Unnamed: 0,Participant_ID,Participant_Information_Survey,Baseline_Survey,Classroom_Norms_Post_Sim_Survey,Exit_Survey,Classroom_Norms_Coding_Baseline,Classroom_Norms_Coding_Precoach,Classroom_Norms_Coding_Postcoach,Classroom_Norms_Coding_Exit
0,1_1819_1_1,Completed,-,-,-,-,-,-,-
1,1_1819_1_10,Completed,-,-,-,-,-,-,-
2,1_1819_1_13,Completed,-,-,-,-,-,-,-
3,1_1819_1_14,Completed,-,-,-,-,-,-,-
4,1_1819_1_16,Completed,-,-,-,-,-,-,-
...,...,...,...,...,...,...,...,...,...
100,1_1819_4_114,Completed,-,-,-,-,-,-,-
101,1_1819_4_115,Completed,-,-,-,-,-,-,-
102,1_1819_4_116,Completed,-,-,-,-,-,-,-
103,1_1819_4_118,Completed,-,-,-,-,-,-,-


## Read in Data for Development

In [23]:
import os
os.getcwd()

'/Users/jmachita03/Documents/GitHub/MSDS_SERA_capstone/RDS'

In [24]:
csvName = "2018_2019_participant_measures.csv"

In [25]:
data = pd.read_csv(csvName)

In [26]:
data.head(2)

Unnamed: 0,id_participant,id_section,id_site,id_year,btreat_cond,fb_treat_cond,cpp_year,ccs_cohort,ccs_stutype,ccs_major,...,fall2018coachingra,fall2018racera,fall2018coachingnotes,spring2019coachingra,spring2019racera,spring2019_ethan_original,spring2019_coaching_original,sis_id,email,name
0,1_1819_2_67,2,1,1819,0,0,2018-19,2018.0,1.0,10,...,No Coaching,White,No coaching conversation,No Coaching,Black,Black,No Coaching,ab2dx,ab2dx@virginia.edu,"Basile,Anna"
1,1_1819_3_86,3,1,1819,0,0,2018-19,2018.0,1.0,10,...,No Coaching,Black,No coaching conversation,No Coaching,Black,Black,No Coaching,ac4ah,ac4ah@virginia.edu,"Copley,Abigail"


## Functions

In [56]:
def getExistingIds(tableName, cursor):
    query = "SELECT Participant_ID FROM " + str(tableName)
    cursor.execute(query)
    result = cursor.fetchall()
    existing_ids = [x[0] for x in result]
    return existing_ids

In [65]:
def getColumnNames(tableName, cursor):
    query = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = N'" + tableName + "'"
    cursor.execute(query)
    result = cursor.fetchall()
    result2 = [x[0] for x in result]
    
    return result2

In [101]:
def getTrackerQueries(csvNameDict, csvName, data, cursor):
    # get the column name to which this csv corresponds
    corresponding_col = None
    for key in csvNameDict.keys():
        if csvName in csvNameDict[key]:
            corresponding_col = key
            
    # get a list of the participants that are already in the participant tracker table
    existing_parts = getExistingIds("Participant_Tracker", cursor)
    
    # set values to appear in the tracker
    taken = "Completed"
    notTaken = "-"
    
    # get a list of all other columns in the PT
    otherCols = getColumnNames("Participant_Tracker", cursor)
    otherCols.remove("Participant_ID")
    otherCols.remove(corresponding_col)
    
    # generate queries
    queries = []
    if 'id_participant' in data.columns:
        for participant in list(data.id_participant):
            if participant in existing_parts:
                query = "UPDATE Participant_Tracker SET " + corresponding_col + " = '" + taken + \
                "' WHERE Participant_ID = '" + participant + "';"
            else:
                query = "REPLACE INTO Participant_Tracker (Participant_ID, " + corresponding_col + ", "
                vals = "'" + participant + "', '" + taken + "', "
                for col in otherCols:
                    query = query + col + ", "
                    vals = vals + "'" + notTaken + "', "
                query = query[:-2] + ") VALUES (" + vals[:-2] + ");"
            queries = queries + [query]
            
    return queries
    

## Update Table

In [58]:
csvName = "2018_2019_participant_measures.csv"

In [120]:
csvName2 = "2019_spring_ exit_postsim_survey.csv"

In [121]:
queries = getTrackerQueries(names, csvName2, cursor)

In [122]:
queries

["UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_2_67';",
 "UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_3_86';",
 "UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_4_108';",
 "UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_2_76';",
 "UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_1_14';",
 "UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_3_87';",
 "UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_1_5';",
 "UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_3_96';",
 "UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_3_83';",
 "UPDATE Participant_Tracker SET Exit_Survey = 'Completed' WHERE Participant_ID = '1_1819_2_59';",
 "UPDATE P

In [123]:
for query in queries:
    cursor.execute(query)

## Close Database Connection

In [7]:
cnx.commit()
cnx.close()