In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [42]:
covariates = ["attendance_1", "rating_1", "talktime_tutor_pct_1",
    "spoken_token_tutor_pct_1", "chat_token_tutor_pct_1", "length_utterance_tutor_1",
    "length_utterance_student_1", "length_utterance_tutor_chat_1", "length_utterance_student_chat_1",
    "ratio_students_engaged_1", "normalized_num_turns_1",
    "normalized_num_high_uptakes_1", "normalized_num_eliciting_1", "normalized_num_questions_students_1",
    "normalized_num_questions_tutor_1", "normalized_student_reasoning_1", "min_sat_score_series",
    "max_sat_score_series", "grade_for_session_1"]

outcome_vars = ['grade_change', 'uptake_change', 'eliciting_change', 'talktime_change', 'aggregate_change']

arms = ['control', 'tutor', 'tutor_student_personal', 'tutor_student_social']

outcomes = ['uptake', 'eliciting', 'grade', 'talktime']

mapping = {0: 'Control', 1: 'Tutor Feedback', 2: 'Tutor Feedback + Goal-Oriented Learner Feedback', 3: 'Tutor Feedback + Socially-Oriented Learner Feedback'}

In [43]:
agg_data = pd.read_csv('aggregated_data.csv')
preds_data = pd.read_csv('aggregated_data_w_preds.csv')

print(len(agg_data.index))
print(len(preds_data.index))

1120
1120


In [44]:
categories = {
    'uptake': ['uptake_0', 'uptake_1', 'uptake_2', 'uptake_3'],
    'eliciting': ['eliciting_0', 'eliciting_1', 'eliciting_2', 'eliciting_3'],
    'grade': ['grade_0', 'grade_1', 'grade_2', 'grade_3'],
    'talktime': ['talktime_0', 'talktime_1', 'talktime_2', 'talktime_3'],
    'agg': ['agg_0', 'agg_1', 'agg_2', 'agg_3']
}

for category, columns in categories.items():
    policy_column = f'{category}_policy'
    preds_data[policy_column] = preds_data[columns].idxmax(axis=1).apply(lambda x: mapping[int(x.split('_')[-1])])

preds_data.head()

Unnamed: 0,uptake_0,uptake_1,uptake_2,uptake_3,eliciting_0,eliciting_1,eliciting_2,eliciting_3,grade_0,grade_1,...,talktime_3,agg_0,agg_1,agg_2,agg_3,uptake_policy,eliciting_policy,grade_policy,talktime_policy,agg_policy
0,-0.090199,-0.040234,0.121668,-0.035727,-0.042895,-0.055054,-0.482427,-0.185747,0.042608,0.037822,...,-0.026528,-0.133364,0.008036,-0.122175,-0.003341,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback
1,-0.019701,-0.038718,0.072051,-0.112848,-0.047536,-0.056324,-0.38088,-0.235727,0.042608,0.057252,...,-0.065176,-0.133364,0.018289,-0.006384,0.005943,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback + Socially-Oriented Learner Fee...,Tutor Feedback + Goal-Oriented Learner Feedback,Tutor Feedback
2,-0.078632,-0.040132,0.084384,-0.054734,-0.041912,-0.055884,-0.531814,-0.185754,0.042608,0.038167,...,-0.009154,-0.133364,0.012352,-0.119271,-0.004597,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback + Socially-Oriented Learner Fee...,Control,Tutor Feedback
3,-0.037082,-0.046364,-0.162677,-0.126876,-0.054275,-0.033682,0.836897,0.098804,0.042608,0.038715,...,-0.014442,-0.133364,0.009075,0.106447,0.011937,Control,Tutor Feedback + Goal-Oriented Learner Feedback,Tutor Feedback + Socially-Oriented Learner Fee...,Tutor Feedback + Socially-Oriented Learner Fee...,Tutor Feedback + Goal-Oriented Learner Feedback
4,-0.043184,-0.035886,0.081479,-0.049349,-0.04651,-0.05296,-0.522948,-0.244993,0.042608,0.054801,...,-0.049138,-0.133364,0.016424,-0.082413,0.00446,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback + Socially-Oriented Learner Fee...,Tutor Feedback + Goal-Oriented Learner Feedback,Tutor Feedback


In [45]:
policy_columns = [f'{category}_policy' for category in categories.keys()]
df = pd.concat([agg_data, preds_data[policy_columns]], axis=1)

df.head()

Unnamed: 0,attendance_1,rating_1,talktime_tutor_pct_1,spoken_token_tutor_pct_1,chat_token_tutor_pct_1,length_utterance_tutor_1,length_utterance_student_1,length_utterance_tutor_chat_1,length_utterance_student_chat_1,ratio_students_engaged_1,...,arm_name,eliciting_change,talktime_change,grade_change,uptake_change,uptake_policy,eliciting_policy,grade_policy,talktime_policy,agg_policy
0,3,2.666667,0.766844,0.662959,1.0,343.833333,126.208333,73.0,0.0,1.0,...,Control,-0.121876,-0.192079,0.283333,-0.099848,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback
1,4,2.0,0.935812,0.820209,0.0,383.8,67.452381,0.0,0.0,1.0,...,Tutor Feedback,-0.058013,0.727236,-0.15,-0.170414,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback + Socially-Oriented Learner Fee...,Tutor Feedback + Goal-Oriented Learner Feedback,Tutor Feedback
2,3,2.666667,0.832812,0.739778,0.592593,314.771084,112.814815,46.0,10.833333,1.0,...,Control,-0.20454,-0.068737,0.091667,-0.129423,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback + Socially-Oriented Learner Fee...,Control,Tutor Feedback
3,4,2.25,0.940294,0.865502,0.524823,800.5,139.652174,66.333333,49.4,1.0,...,Control,0.105815,0.158222,0.1375,0.018174,Control,Tutor Feedback + Goal-Oriented Learner Feedback,Tutor Feedback + Socially-Oriented Learner Fee...,Tutor Feedback + Socially-Oriented Learner Fee...,Tutor Feedback + Goal-Oriented Learner Feedback
4,2,2.0,0.859754,0.78968,1.0,344.470588,64.136364,37.0,0.0,1.0,...,Tutor Feedback,-0.0118,-0.066613,0.075,-0.139562,Tutor Feedback + Goal-Oriented Learner Feedback,Control,Tutor Feedback + Socially-Oriented Learner Fee...,Tutor Feedback + Goal-Oriented Learner Feedback,Tutor Feedback


In [46]:
policy_distribution = df['agg_policy'].value_counts()

# Print the distribution
print(policy_distribution)

# Optional: Get the relative frequency distribution
policy_distribution_normalized = df['talktime_policy'].value_counts(normalize=True)

# Print the normalized distribution
print(policy_distribution_normalized)

agg_policy
Tutor Feedback + Goal-Oriented Learner Feedback        566
Tutor Feedback                                         410
Tutor Feedback + Socially-Oriented Learner Feedback    137
Control                                                  7
Name: count, dtype: int64
talktime_policy
Tutor Feedback + Goal-Oriented Learner Feedback        0.614286
Tutor Feedback + Socially-Oriented Learner Feedback    0.188393
Control                                                0.129464
Tutor Feedback                                         0.067857
Name: proportion, dtype: float64


In [None]:
df = pd.read_csv('summary_data_with_policy.csv')
# samples_df = summary_df[covariates]
# outcome_df = summary_df[outcome_vars]

In [47]:
# REGRESSION EVALUATION STARTS HERE

def get_avg_scores(df, policy_col, outcome_var_col):
    matching_df = df[df['arm_name'] == df[policy_col]]
    aligned_mean = matching_df[outcome_var_col].mean()

    not_matching_df = df[df['arm_name'] != df[policy_col]]
    not_aligned_mean = not_matching_df[outcome_var_col].mean()

    return aligned_mean, not_aligned_mean

In [14]:
# get policy performance for all outcomes

uptake_al_score, uptake_not_al_score = get_avg_scores(df, 'uptake_policy', 'uptake_change')
print("Uptake aligned score: ", uptake_al_score)
print("Uptake not aligned score: ", uptake_not_al_score)

eliciting_al_score, eliciting_not_al_score = get_avg_scores(df, 'eliciting_policy', 'eliciting_change')
print("Eliciting aligned score: ", eliciting_al_score)
print("Eliciting not aligned score: ", eliciting_not_al_score)

grade_al_score, grade_not_al_score = get_avg_scores(df, 'grade_policy', 'grade_change')
print("Grade aligned score: ", grade_al_score)
print("Grade not aligned score: ", grade_not_al_score)

talktime_al_score, talktime_not_al_score = get_avg_scores(df, 'talktime_policy', 'talktime_change')
print("Talktime aligned score: ", talktime_al_score)
print("Talktime not aligned score: ", talktime_not_al_score)

agg_al_score, agg_not_al_score = get_avg_scores(df, 'agg_policy', 'agg_change')
print("Aggregate aligned score: ", agg_al_score)
print("Aggregate not aligned score: ", agg_not_al_score)

Uptake aligned score:  -0.03475389346327193
Uptake not aligned score:  -0.01718934842130444
Eliciting aligned score:  -0.023170289439467823
Eliciting not aligned score:  -0.049709768388427646
Grade aligned score:  0.07886848403456999
Grade not aligned score:  0.06986839194912313
Talktime aligned score:  -0.016533615463732207
Talktime not aligned score:  -0.023373587455522035


## CALCULATE STATISTICAL SIGNIFICANCE

In [41]:
from scipy.stats import ttest_ind

# Separate the data into control and feedback groups
outcome = 'aggregate_change'
arm = 'Tutor Feedback + Socially-Oriented Learner Feedback'
# 'Tutor Feedback + Goal-Oriented Learner Feedback'
# 'Tutor Feedback + Socially-Oriented Learner Feedback'

df['aggregate_change'] = df['uptake_change'] + df['eliciting_change'] + df['talktime_change'] + df['grade_change']

control_group = df[df['arm_name'] == 'Control'][outcome]
feedback_group = df[df['arm_name'] == arm][outcome]

# Perform the independent t-test
t_stat, p_value = ttest_ind(control_group, feedback_group)

print(f"Results for {outcome} in {arm}")
# Print the results
print(f'T-statistic: {t_stat}')
print(f'P-value: {p_value}')

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("We reject the null hypothesis. There is a significant difference between the groups.")
else:
    print("We fail to reject the null hypothesis. There is no significant difference between the groups.")

Results for aggregate_change in Tutor Feedback + Socially-Oriented Learner Feedback
T-statistic: -0.22736363967896503
P-value: 0.8202242691745084
We fail to reject the null hypothesis. There is no significant difference between the groups.


In [38]:
march_df = pd.read_csv('../final_project/march_summary_data.csv')
may_df = pd.read_csv('../final_project/may_summary_data.csv')

print(len(march_df.index))
print(len(may_df.index))

policy_distribution = may_df['arm_name'].value_counts()

# Print the distribution
print(policy_distribution)

# # Optional: Get the relative frequency distribution
# policy_distribution_normalized = df['talktime_policy'].value_counts(normalize=True)

# # Print the normalized distribution
# print(policy_distribution_normalized)

629
491
arm_name
Tutor Feedback                                         164
Control                                                157
Tutor Feedback + Socially-Oriented Learner Feedback     87
Tutor Feedback + Goal-Oriented Learner Feedback         83
Name: count, dtype: int64
