In [1]:
import pandas as pd
import plotly.express as px
from scipy.stats import pearsonr, chi2_contingency



In [4]:
google_trends = pd.read_csv('multiTimeline.csv', skiprows=2)
stock_price = pd.read_csv('SMR.csv')

merged_data = google_trends.merge(stock_price, left_on='Day', right_on='Date')
# scale volume down by 1M
merged_data['Volume'] = merged_data['Volume']/100000

fig = px.line(merged_data, x='Day',y=['NuScale Power: (United States)', 'Volume']).show()

In [5]:
correlation_coefficient, p_value = pearsonr(merged_data['NuScale Power: (United States)'], merged_data['Volume'])

print("Pearson correlation coefficient:", correlation_coefficient)
print("P-value:", p_value)


Pearson correlation coefficient: 0.3895863898237925
P-value: 6.513932805041464e-06


In [42]:
# Create a contingency table using pandas crosstab function
contingency_table = pd.crosstab(merged_data['NuScale Power: (United States)'], merged_data['Volume'])

# Perform chi-square test
chi2_stat, p_val, dof, expected = chi2_contingency(contingency_table)

print("Chi-square statistic:", chi2_stat)
print("P-value:", p_val)
print("Degrees of freedom:", dof)
print("Expected frequencies table:")
print(expected)

Chi-square statistic: 2772.0000000000005
P-value: 0.38024246099414094
Degrees of freedom: 2750
Expected frequencies table:
[[0.72222222 0.72222222 0.72222222 ... 0.72222222 0.72222222 0.72222222]
 [0.00793651 0.00793651 0.00793651 ... 0.00793651 0.00793651 0.00793651]
 [0.02380952 0.02380952 0.02380952 ... 0.02380952 0.02380952 0.02380952]
 ...
 [0.00793651 0.00793651 0.00793651 ... 0.00793651 0.00793651 0.00793651]
 [0.00793651 0.00793651 0.00793651 ... 0.00793651 0.00793651 0.00793651]
 [0.00793651 0.00793651 0.00793651 ... 0.00793651 0.00793651 0.00793651]]


In [45]:
# Remove where Google is zero
merged_data = merged_data[merged_data['NuScale Power: (United States)'] != 0]
correlation_coefficient, p_value = pearsonr(merged_data['NuScale Power: (United States)'], merged_data['Volume'])

print("Pearson correlation coefficient:", correlation_coefficient)
print("P-value:", p_value)


Pearson correlation coefficient: 0.17953045426608488
P-value: 0.30209182825771086
