In [None]:
import pandas as pd
import numpy as np

In [None]:
merged_survey = pd.read_csv('Merged_survey.csv')

In [None]:
merged_survey.drop(columns=['RecipientEmail'], inplace=True)

In [None]:
merged_survey['Treatment'].head()

Unnamed: 0,Treatment
0,0.0
1,1.0
2,1.0
3,0.0
4,0.0


In [None]:
merged_survey['Treatment'] = merged_survey['Treatment'].astype(int)

In [None]:
merged_survey.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 22 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   pre_class_time_spent                        35 non-null     int64  
 1   pre_class_DistributionChannel               35 non-null     object 
 2   CVQ1_gender_bool                            35 non-null     float64
 3   CVQ2_read_stubhub_case_bool                 35 non-null     float64
 4   CVQ3_knowledge_causality_val                35 non-null     float64
 5   CVQ4_likeliness_participate_val             35 non-null     float64
 6   RQ1_understand_material_val                 35 non-null     float64
 7   RQ2_remember_tomorrow_val                   35 non-null     float64
 8   RQ3_active_involvement_val                  35 non-null     float64
 9   Score                                       35 non-null     int64  
 10  Treatment       

In [None]:
import numpy as np

# Function to calculate Cohen's d
def cohens_d(group1, group2):
    # Calculate the means and standard deviations of both groups
    mean1, mean2 = np.mean(group1), np.mean(group2)
    std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1)

    # Pooled standard deviation
    pooled_std = np.sqrt(((len(group1) - 1) * std1**2 + (len(group2) - 1) * std2**2) / (len(group1) + len(group2) - 2))

    # Cohen's d formula
    return (mean1 - mean2) / pooled_std

# Define the response variables
response_vars = ['RQ1_understand_material_val', 'post_class_class_participation_val', 'post_class_recall_val']

# Results storage
cohens_d_results = {}

# Analyze each response variable
for response_var in response_vars:
    control_group = merged_survey[merged_survey['Treatment'] == 0][response_var]
    treatment_group = merged_survey[merged_survey['Treatment'] == 1][response_var]

    # Cohen's d
    cohens_d_results[response_var] = cohens_d(treatment_group, control_group)

cohens_d_results

{'RQ1_understand_material_val': 0.24025492113602034,
 'post_class_class_participation_val': 0.1283647525315742,
 'post_class_recall_val': -0.10772428782883381}

- RQ1_understand_material_val: 0.2403 (small effect)
- post_class_class_participation_val: 0.1283 (small effect)
- post_class_recall_val: -0.1077 (small effect)

In [None]:
import pingouin as pg
rq1_power = pg.power_ttest(d=cohens_d_results['RQ1_understand_material_val'], n=len(merged_survey), alpha=0.05, alternative='two-sided')
participation_power = pg.power_ttest(d=cohens_d_results['post_class_class_participation_val'], n=len(merged_survey), alpha=0.05, alternative='two-sided')
recall_power = pg.power_ttest(d=cohens_d_results['post_class_recall_val'], n=len(merged_survey), alpha=0.05, alternative='two-sided')
print(f"RQ1 Power: {rq1_power}")
print(f"Participation Power: {participation_power}")
print(f"Recall Power: {recall_power}")

RQ1 Power: 0.16784340907453155
Participation Power: 0.08268141243262343
Recall Power: 0.07290587685255286


In [None]:
from scipy import stats

import statsmodels.api as sm
import statsmodels.stats.api as sms
count = (merged_survey['Treatment'] == 1).sum()
nobs = len(merged_survey)
stat, pval = sms.proportions_ztest(count, nobs, value = 0.5)
print(f"Z-statistic: {stat:.4f}, P-value: {pval:.4f}")

Z-statistic: -1.5742, P-value: 0.1154


In [None]:
!pip install pingouin

Collecting pingouin
  Downloading pingouin-0.5.5-py3-none-any.whl.metadata (19 kB)
Collecting pandas-flavor (from pingouin)
  Downloading pandas_flavor-0.6.0-py3-none-any.whl.metadata (6.3 kB)
Downloading pingouin-0.5.5-py3-none-any.whl (204 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m204.4/204.4 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pandas_flavor-0.6.0-py3-none-any.whl (7.2 kB)
Installing collected packages: pandas-flavor, pingouin
Successfully installed pandas-flavor-0.6.0 pingouin-0.5.5


In [None]:
!pip install stargazer

Collecting stargazer
  Downloading stargazer-0.0.7-py3-none-any.whl.metadata (6.3 kB)
Downloading stargazer-0.0.7-py3-none-any.whl (15 kB)
Installing collected packages: stargazer
Successfully installed stargazer-0.0.7


In [None]:
import statsmodels.formula.api as smf
from stargazer.stargazer import Stargazer

CV1 = smf.ols("CVQ1_gender_bool ~ Treatment", data=merged_survey).fit(cov_type='HC1')
CV2 = smf.ols("CVQ2_read_stubhub_case_bool ~ Treatment", data=merged_survey).fit(cov_type='HC1')
CV3 = smf.ols("CVQ3_knowledge_causality_val ~ Treatment", data=merged_survey).fit(cov_type='HC1')
CV4 = smf.ols("CVQ4_likeliness_participate_val	 ~ Treatment", data=merged_survey).fit(cov_type='HC1')

reg_cv = Stargazer([CV1, CV2, CV3, CV4])
reg_cv

0,1,2,3,4
,,,,
,,,,
,(1),(2),(3),(4)
,,,,
Intercept,0.591***,0.773***,61.636***,68.955***
,(0.108),(0.092),(3.589),(5.279)
Treatment,-0.052,0.150,0.748,2.507
,(0.179),(0.119),(5.078),(7.772)
Observations,35,35,35,35
R2,0.003,0.037,0.001,0.003
