In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# given a set of covariates, run linear regression model for each treatment arm
# take argmax of predicted outcome values to determine treatment assignment

covariates = ["attendance_1", "rating_1", "talktime_tutor_pct_1",
    "spoken_token_tutor_pct_1", "chat_token_tutor_pct_1", "length_utterance_tutor_1",
    "length_utterance_student_1", "length_utterance_tutor_chat_1", "length_utterance_student_chat_1",
    "ratio_students_engaged_1", "normalized_num_student_names_used_1", "normalized_num_turns_1",
    "normalized_num_high_uptakes_1", "normalized_num_eliciting_1", "normalized_num_questions_students_1",
    "normalized_num_questions_tutor_1", "normalized_student_reasoning_1", "min_sat_score_series",
    "max_sat_score_series", "grade_for_session_1"]

outcome_vars = ['grade_change', 'uptake_change', 'eliciting_change', 'talktime_change']

In [None]:
arms = ['control', 'tutor', 'tutor_student_social', 'tutor_student_personal']
outcomes = ['uptake', 'eliciting', 'grade', 'talktime']

ridge_weights = {
    'uptake': {
        # lambda = 281.8383
        'control': [0.000591253619096056, 0.00349192837794334, 0.00163837401270737, 0.000392928131938582, 0.00628362692389463, 0.000928620270690342, 0.00334606667673785, 0.0050390109287224, -0.00104168059907718, 0.00094281330156002, 0.000943426700840534, -0.00164999347175013, -0.0058906961048601, 0.000976560146530648, -0.00269516410489355, 0.000601522927963152, 0.00461622098081581, -0.00626708680743313, -0.00626708680743312, -0.00321923636598734],
        # lambda = 199.5262
        'tutor': [-0.00170969946695357, -0.00252746557960866, -0.00261290580680996, -0.00425084688003946, 0.00582540547576182, 0.010402635478599, 0.0024410071647915, -0.000102781223324614, 0.00307394195266067, -0.00335634594489039, -0.00612237418025647, 0.000352860112035007, -0.00680374598663324, 0.00380220739638761, 0.00554134500158855, -0.00263285835828924, -0.000343687193700856, 0.0038328150486904, 0.0038328150486904, 0.00147191696942888],
        # lambda = 891.2509
        'tutor_student_goal': [-0.000446768101084414, 0.0020118570054462, -0.00144036450425541, -0.000443210670608453, 0.00156181755022382, -0.000219164099267695, 0.000252534799227463, 0.000446521864583035, -0.000732148667425945, 0.000500785703389117, 0.000398857679316388, 0.000854482892295336, -0.00125907266064274, 7.51410077158536e-05, 0.000561173637811089, 0.000948326473374993, 0.000145889394924652, -0.000984036046587033, -0.000984036046587033, -0.00116475834065422],
        # lambda = 1000
        'tutor_student_personal': [0.00073948389114542, -0.00043180678059655, 0.000546090672066559, -0.000594404304766719, -0.000481732763627209, 0.00155616699713513, 0.00200310927406824, 0.00167152641034576, -0.000234293166183867, -0.000723505844603543, -0.000388157751829436, 0.000261182204026283, -0.00085367208823032, -0.000354432166823639, 0.000926632776922595, -0.00071552709285163, 0.00058230268268156, -0.000599912026701261, -0.000599912026701262, -0.000494551679303895]
        
    },
    'eliciting': {
        # lambda = 89.12509
        'control': [-0.00931744606569983, -0.00370506854741289, -0.00853207160299967, -0.00925301149923733, 0.0168390063741481, -0.000723862834382431, 0.0189668972784014, -2.96044451075853e-05, -0.00338740277647341, 0.00302851753054982, 0.00955330288912662, 0.00784746278484755, -0.0114055449829301, -0.0342753672414445, 0.00393543916508945, -0.0100471111973935, -0.00797720838373963, -0.0150051240044933, -0.0150051240044933, 0.0079398120074066],
        # lambda = 44.66836
        'tutor': [-0.00846021766481495, 0.0136634258000128, 0.00144614525889279, 0.00338192614971391, 0.0120821059401313, -0.0132520361617514, 0.0107850987015585, 0.00812443693656711, 0.0021562084569256, -0.0178940446560325, -0.00993475531864005, -0.00509565104454726, -0.00457051344952615, -0.0638779113237588, -0.000720293875973355, 0.0119443175834169, -0.01880732665642, 0.00319961566440838, 0.00319961566440839, -0.00311944401749419],
        # lambda = 707.9458
        'tutor_student_social': [-0.000128556238466294, -0.000106658186529845, 0.00109507615270095, -0.000725336265882493, -0.000806689646386603, -0.000961416951378395, -0.0009436277865964, 0.000520779795648428, 0.00425403890480968, 0.00133334064914369, 0.00195364205614866, -0.000536246393162311, -0.0017997493178585, -0.00304377820213646, -0.00167127412359991, -0.000531650668110165, -0.00432678691773552, -0.000580046965216434, -0.000580046965216434, 0.000891520150615213],
        # lamba = 1000
        'tutor_student_personal': [0.00166836509383662, 0.000518056952227966, 0.000543848475369513, 0.000508836233869522, -0.00170504364322295, 0.000325287236760387, 0.000602179720265705, -0.000157918384207825, -0.000332472852034852, -0.000383463281750147, 0.000117973040221462, -0.000698577258948554, 0.000893612667214244, 0.000261049642950966, -0.00146803285318683, -0.00038024642635194, 0.00056731226191765, -0.000957582344211074, -0.000957582344211073, 0.00052300215360484]
    },
    'grade': {
        # lambda = 35.48134
        'control': [0.010482925677013, -0.0160881587904483, -0.00633199275454413, 0.00376742875853031, -0.00401644033432086, 0.00669646288927839, 0.00534486698887683, 0.00141754278897048, -0.0137683852255327, 0.00598740342267694, 0.00288172304294724, 0.00222740490833344, -0.00949717332320525, 0.0098239997312674, 0.0111876155165498, 0.000934385170899794, 0.0119722979380642, 0.0106159036037869, 0.0106159036037868, -0.0294695383362359],
        # lambda = 125.8925
        'tutor': [-0.00912677519141586, 0.00456047296490467, -0.00409324015615758, 0.000308597306301472, 0.00186512196230382, 0.0015592847865484, -1.91270970125623e-05, 0.0105352000257309, 0.00176304687110587, 0.000380845283847018, 0.000409368911481795, -0.00432633924457445, 0.00794607842061777, 0.00539600759257231, 0.00142821660371373, -0.00331467200300078, 0.00109404396097272, 0.00828588614626997, 0.00828588614626997, -0.0110274137803355],
        # lambda = 70.79458
        'tutor_student_social': [-0.00595503054167041, -0.00297076946824654, 0.00202146933592689, -0.00143140248774933, 0.00207924404614357, -0.000469092229094445, -0.000724296134524327, 0.000718516548881277, -0.00560540118545421, -0.00368687889159408, -0.0029886252707396, -0.000750148791032511, -0.00123947297716583, 0.00162710417756307, 0.00727303205021893, 0.0030012784119729, -0.00461834642199133, 0.00431228716727497, 0.00431228716727497, -0.0203962628212368],
        # lamba = 1000
        'tutor_student_personal': [-0.000412639871006166, -0.000325803856016728, -0.000149834220351544, -0.000128041716626663, 0.000872297279008715, -0.000378758233050001, -0.00014547271628143, -0.000451876271380449, 8.38232191250682e-05, 0.000651251696901686, 0.000188757608218802, 0.000444901757729534, -0.000445618405873889, -0.000587189660497042, 0.00014010697275611, 6.46505320437356e-05, 5.83197584347947e-05, 0.000654997372428769, 0.000654997372428769, 0.000347221009533172]
    },
    'talktime': {
        # lambda = 562.3413
        'control': [0.00123466373467044, -0.0033392942181512, 0.00385641949021032, 0.00179816665615321, -0.00071700528707579, 0.000865363184219157, -0.0024333286432967, 0.000831969224378554, 0.00257200380113237, -0.000932683332237004, -0.000403565784233759, 0.00186264848119108, 0.00118630418594212, 0.000854699935068401, 0.00129009293449602, 0.00282864779080449, 0.00106294936270399, 0.00237896336174364, 0.00237896336174364, 0.0037584534482249],
        # lambda = 10
        'tutor': [-0.00608612758715607, 0.00666041674098689, 0.0747330516146351, -0.0262462682796522, -0.00608340282604194, 0.00772745414542366, -0.0206458667302977, -0.00215428406159538, -0.0159490301549525, 0.00616140205237427, -0.00588188647783017, 0.0346359628357313, 0.0295181490260008, -0.0122630697975776, -0.00290316349119735, -0.0302186189428479, 0.0151538410830962, -0.00629113708817596, -0.00629113708817597, 0.00689634465395759],
        # lambda = 199.5262
        'tutor_student_social': [-0.00630217274403709, 0.00227474394590554, 0.0027797292677491, 0.00516113505289943, -0.000157084800096524, -0.000971160518680697, -0.00659763900673242, 0.00238812341905026, 0.00155699474646774, -0.000747467055458223, 0.00541965386483876, 0.00163053211285706, -0.00749716641928229, -0.00712208836431146, -0.00447395212627961, -0.00041415689786751, -0.00426627196704427, 0.00438831583014932, 0.00438831583014932, 0.00456715576197616],
        # lambda = 199.5262
        'tutor_student_personal': [0.00147087633248089, -0.00174534873890604, 0.00995951769873167, 0.00621348355411867, -0.00586074241572217, -0.00274855613296423, -0.00544389843534638, -0.00771426666793536, -0.00339000341255627, 0.00230449460453529, -0.000464008091102991, -0.00296924358861521, -0.00424671088543255, 0.00454760825076396, -0.00723564771718867, 0.00194973647340399, 0.00102807560702765, -0.00237304939085452, -0.00237304939085452, -0.00228062823709509]
    }, # TODO: add an aggreagated outcome measure??
}

lasso_weights = {
    'uptake': {
        'control': [],
        'tutor': [0.0057869174182523415, -0.04939015234748881, 0.086674115795949, -0.23038033604920116, 
              0.08305298875740905, 7.194821794237663e-05, 0.00042790333804937494, -0.0016273907239995056, 0.0011625396618618778, 
              0.007608148533284885, -0.005157032664444589, -0.006158000152285133, -0.5378618319169195, 0.09970945325939601, 
              0.07182299942295509, 0.18544199470363004, -0.34138781582583033, 0.0022019252435830357, 0.011199830742724497, 
              0.0006215122480511331, 219397820797.08447, -219397820797.0844, -0.03790867036994146],
        'tutor_student_social': [],
        'tutor_student_personal': []
    },
    'eliciting': {
        'control': [],
        'tutor': [0.0057869174182523415, -0.04939015234748881, 0.086674115795949, -0.23038033604920116, 
              0.08305298875740905, 7.194821794237663e-05, 0.00042790333804937494, -0.0016273907239995056, 0.0011625396618618778, 
              0.007608148533284885, -0.005157032664444589, -0.006158000152285133, -0.5378618319169195, 0.09970945325939601, 
              0.07182299942295509, 0.18544199470363004, -0.34138781582583033, 0.0022019252435830357, 0.011199830742724497, 
              0.0006215122480511331, 219397820797.08447, -219397820797.0844, -0.03790867036994146],
        'tutor_student_social': [],
        'tutor_student_personal': []
    },
    'grade': {
        'control': [],
        'tutor': [0.0057869174182523415, -0.04939015234748881, 0.086674115795949, -0.23038033604920116, 
              0.08305298875740905, 7.194821794237663e-05, 0.00042790333804937494, -0.0016273907239995056, 0.0011625396618618778, 
              0.007608148533284885, -0.005157032664444589, -0.006158000152285133, -0.5378618319169195, 0.09970945325939601, 
              0.07182299942295509, 0.18544199470363004, -0.34138781582583033, 0.0022019252435830357, 0.011199830742724497, 
              0.0006215122480511331, 219397820797.08447, -219397820797.0844, -0.03790867036994146],
        'tutor_student_social': [],
        'tutor_student_personal': []
    },
    'talktime': {
        'control': [],
        'tutor': [0.0057869174182523415, -0.04939015234748881, 0.086674115795949, -0.23038033604920116, 
              0.08305298875740905, 7.194821794237663e-05, 0.00042790333804937494, -0.0016273907239995056, 0.0011625396618618778, 
              0.007608148533284885, -0.005157032664444589, -0.006158000152285133, -0.5378618319169195, 0.09970945325939601, 
              0.07182299942295509, 0.18544199470363004, -0.34138781582583033, 0.0022019252435830357, 0.011199830742724497, 
              0.0006215122480511331, 219397820797.08447, -219397820797.0844, -0.03790867036994146],
        'tutor_student_social': [],
        'tutor_student_personal': []
    }, # TODO: add an aggreagated outcome measure??
}
    


In [None]:
summary_df = pd.read_csv('summary_data_test.csv')
samples_df = summary_df[covariates]
outcome_df = summary_df[outcome_vars]

In [None]:
# loop through each row in samples_df and get predicted policy for each outcome variable

# 1. CREATE SAMPLES MATRIX (num samples x num covariates)
samples = []
for index, row in samples_df.iterrows():
    ndarray = row.to_numpy()  # or row.values
    samples.append(ndarray)

samples_matrix = np.array(samples)

print(samples_matrix.shape)

In [None]:
# 2. CREATE WEIGHTS MATRIX FOR EACH OUTCOME VAR (num covariates x num arms) 20 x 4

weights = ridge_weights # <--- CHOOSE WHICH ONE YOU'RE DOING!!!
uptake_weights = ndarray()
eliciting_weights = ndarray()
grade_weights = ndarray()
talktime_weights = ndarray()

# uptake
curr_dict = weights['uptake']
uptake_weights = np.array([curr_dict['control'], curr_dict['tutor'], curr_dict['tutor_student_social'], curr_dict['tutor_student_personal']])

# eliciting
curr_dict = weights['eliticing']
eliciting_weights = np.array([curr_dict['control'], curr_dict['tutor'], curr_dict['tutor_student_social'], curr_dict['tutor_student_personal']])

# grade
curr_dict = weights['grade']
grade_weights = np.array([curr_dict['control'], curr_dict['tutor'], curr_dict['tutor_student_social'], curr_dict['tutor_student_personal']])

# talktime
curr_dict = weights['talktime']
talktime_weights = np.array([curr_dict['control'], curr_dict['tutor'], curr_dict['tutor_student_social'], curr_dict['tutor_student_personal']])
                        

In [None]:
# 3. GET SCORES VAR (num samples x num arms) 
# multiply sample matrix by weights matrix for each outcome var

uptake_scores = np.dot(samples_matrix, uptake_weights)
eliciting_scores = np.dot(samples_matrix, eliciting_weights)
grade_scores = np.dot(samples_matrix, grade_weights)
talktime_scores = np.dot(samples_matrix, talktime_weights)

In [None]:
# 4. GET PREDICTED POLICY (num samples x 1)
# take argmax of scores matrix

uptake_policy = np.argmax(uptake_scores, axis=1)
eliciting_policy = np.argmax(eliciting_scores, axis=1)
grade_policy = np.argmax(grade_scores, axis=1)
talktime_policy = np.argmax(talktime_scores, axis=1)

In [None]:
# 5. ADD PREDICTED POLICY AS COLUMNS IN SUMMARY_DF

summary_df['uptake_policy'] = uptake_policy
summary_df['eliciting_policy'] = eliciting_policy
summary_df['grade_policy'] = grade_policy
summary_df['talktime_policy'] = talktime_policy

In [None]:
# 6. EXPORT SUMMARY_DF TO CSV (just so we have it)

summary_df.to_csv('summary_data_with_policy_ridge.csv')

## EVALUATION