In [None]:
!pip install statsmodels

import pandas as pd
import folium, json
import numpy as np 
import statsmodels.api as sm 
from scipy.stats import pearsonr, spearmanr 
from statsmodels.stats.multitest import multipletests

In [None]:
geo_json_path = "../data/raw/shapefiles/se.geojson"
corona_df = pd.read_csv("../data/raw/corona/se_corona.csv", sep = "\t")

with open("../data/raw/metadata/se_metadata.json", "r", encoding="utf-8") as f:
    country_metadata = json.load(f)


region_map = {country_metadata["country_metadata"][i]["covid_region_code"]: country_metadata["country_metadata"][i]["iso3166-2_code"] for i in range(len(country_metadata["country_metadata"]))}

corona_df["iso3166-2"] = corona_df["region_code"].map(region_map)

corona_df

In [None]:
weather_df = pd.read_csv("../data/raw/weather/weather.csv")

# should be run only ONCE
weather_df["TemperatureAboveGround"] = weather_df["TemperatureAboveGround"] - 273.15

weather_df = weather_df[weather_df["iso3166-2"].str.startswith("SE")]

weather_df






In [None]:
population_map = {country_metadata["country_metadata"][i]["iso3166-2_code"]: country_metadata["country_metadata"][i]["population"] for i in range (len(country_metadata["country_metadata"]))
}

population_map

In [None]:
merged_df = corona_df.merge(weather_df, left_on=["date", "iso3166-2"], right_on= ["date", "iso3166-2"])

merged_df





In [None]:
merged_size = merged_df.shape

corona_size = corona_df.shape

weather_size = weather_df.shape

print(merged_size)

print(corona_size)
print(weather_size)

#We lost row but that's okay. I'm holding well, thank you

# we lost rows but it is ok cause if we dont have values of one, 
#but only of the second, it doesnt make sense to have half values that cant 
#be compared. 

In [None]:
# Pearson 

Xs= ["RelativeHumiditySurface", "SolarRadiation", "Surfacepressure", "TemperatureAboveGround", "Totalprecipitation", "UVIndex", "WindSpeed"]

significance_threshold = 0.005/7


for var in Xs: 
    corr, pvalue = pearsonr(merged_df["confirmed_addition"], merged_df[var])
    print(f"{var}\n{corr:.3f}\t{pvalue}\t{pvalue < significance_threshold}\n")



In [None]:
# Spearman 

Xs= ["RelativeHumiditySurface", "SolarRadiation", "Surfacepressure", "TemperatureAboveGround", "Totalprecipitation", "UVIndex", "WindSpeed"]

significance_threshold = 0.005/7  # 

for var in Xs: 
    corr, pvalue = spearmanr(merged_df["confirmed_addition"], merged_df[var])
    print(f"{var}\n{corr:.3f}\t{pvalue}\t{pvalue < significance_threshold}\n")


In [None]:
# Log Log

Xs= ["RelativeHumiditySurface", "SolarRadiation", "Surfacepressure", "TemperatureAboveGround", "Totalprecipitation", "UVIndex", "WindSpeed"]

significance_threshold = 0.005/7 #Bonferroni correction

for var in Xs: 
    corr, pvalue = pearsonr(np.log(merged_df["confirmed_addition"]+1), merged_df[var])
    print(f"{var}\n{corr:.3f}\t{pvalue}\t{pvalue < significance_threshold}\n")

In [None]:
#statsmodels.stats.multitest.multipletests
pvalues = []
tests = ("linear", "spearman", "log")

for var in Xs:
    corr, pvalue = pearsonr(merged_df["confirmed_addition"], merged_df[var])
    pvalues.append(pvalue)
for var in Xs:
    corr, pvalue = spearmanr(merged_df["confirmed_addition"], merged_df[var])
    pvalues.append(pvalue)


for var in Xs: 
    corr, pvalue = pearsonr(np.log(merged_df["confirmed_addition"]+1), merged_df[var])
    pvalues.append(pvalue)
    
significant, pholmcorrected, _ , _ = multipletests(pvalues, alpha = 0.005, method = "holm")

for i in range(len(tests)):
    for j in range(len(Xs)):
        pvalue = pearsonr(np.log(merged_df["confirmed_addition"]+1), merged_df[var])
    
significant, pholmcorrected, _ , _ = multipletests(pvalues, alpha = 0.05, method = "holm")

for i in range(len(tests)):
    for j in range(len(Xs)):
        print(f"{tests[i]}\t{Xs[j]}\t{significant[i+j]}")

The hypotheses for which the Bonferroni and Holm-Bonferroni disagree are as follows:

in Pearson:
* all match

in Spearman:
* all match

in Loglog:
* Surfacepressure
* Totalprecipitation


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=16528e8b-211f-4536-8a49-afddba94916d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>