In [18]:
import numpy as np
import pandas as pd 
from scipy.stats import ttest_ind

In [19]:
# read in data 
data = pd.read_csv("/Users/victoriaguo/Desktop/DS 4002/project 1/final_data.csv")

In [20]:
# split data into two groups: positive and negative
positive = data[data['sentiment'] == 'Positive']['helpful_rate']
negative = data[data['sentiment'] == 'Negative']['helpful_rate']

we want to make sure our assumptions for t tests are met: normality & constant variance

In [21]:
# test for normality: kolmogorov-smirnov test
from scipy.stats import kstest

statistic, p_value = kstest(data['helpful_rate'], 'norm')

print("Kolmogorov-Smirnov Test:")
print("Statistic:", statistic)
print("P-value:", p_value)

Kolmogorov-Smirnov Test:
Statistic: 0.5
P-value: 0.0


Even though the test for normality is not met, our dataset contains 9000 observations, which means our sample size is large enough to proceed with a t-test.

In [22]:
# test for constant variance: bartlett's 
from scipy.stats import bartlett

statistic, p_value = bartlett(positive, negative)
print("Bartlett's Test:")
print("Statistic:", statistic)
print("P-value:", p_value)

Bartlett's Test:
Statistic: 7.18957112739276
P-value: 0.007332850497683136


Even though the test for constant variance is not met, our dataset contains 9000 observations, which means our sample size is large enough to proceed with a t-test.

In [23]:
# perform independent t tests
t_statistic, p_value = ttest_ind(positive, negative)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -2.3533497496693596
P-Value: 0.018625418802965643


Conclusion: reject the null hypothesis. There is a significant difference between helpfulness ratings between negative and positive reviews.

In [24]:
# create new dataframe with sentiment as columns

new = pd.DataFrame()

new['Positive'] = data.loc[data['sentiment'] == 'Positive', 'helpful_rate'].reset_index(drop=True)

new['Negative'] = data.loc[data['sentiment'] == 'Negative', 'helpful_rate'].reset_index(drop=True)


In [28]:
# remove 0 values
new = new[new['Positive'] != 0.00]
new = new[new['Negative'] != 0.00]

new.head()

Unnamed: 0,Positive,Negative
1,0.93,1.0
9,1.0,1.0
15,1.0,0.86
60,1.0,1.0
68,1.0,1.0


In [30]:
# because the assumptions above were not met, we performed a non-parametric test to validate the conclusions (no assumptions about data necessary)
from scipy.stats import wilcoxon

# Assuming 'data' is your paired data
# Perform Wilcoxon signed-rank test
statistic, p_value = wilcoxon(new)
print("Wilcoxon Signed-Rank Test:")
print("Statistic:", statistic)
print("P-value:", p_value)


Wilcoxon Signed-Rank Test:
Statistic: [ 0. nan]
P-value: [ 0. nan]


p-value is virtually 0 so we can reject null. We have sufficient evidence to conclude that the helpful rate between positive and negative reviews are different. 