In [None]:
# importing 


import pandas as pd
import folium, json
import numpy as np 
import statsmodels.api as sm 
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr 
from statsmodels.stats.multitest import multipletests



In [None]:
# defining the dataframe


weather_df = pd.read_csv("../data/raw/weather/weather.csv")

# should be run only ONCE
weather_df["TemperatureAboveGround"] = weather_df["TemperatureAboveGround"] - 273.15

sweden_weather_df = weather_df[weather_df["iso3166-2"].str.startswith("SE")]

#weather variables per day per region


corona_df = pd.read_csv("../data/raw/corona/se_corona.csv", sep = "\t")
#number of cases per day per region

corona_df = corona_df.rename(columns = {"confirmed_addition":"cases"})

with open("../data/raw/metadata/se_metadata.json", "r", encoding="utf-8") as f:
    country_metadata = json.load(f)

#population and region code per region



In [None]:
# checking for NA values
sweden_weather_df.isna().any().any()
corona_df.isna().any().any()

In [None]:
# Stringency Index

str_df = pd.read_csv("../data/raw/corona/stringency_index.csv")
str_df = str_df[(str_df["country_name"] == "Sweden")]

str_df = str_df.transpose()
str_df = str_df[3:].dropna() #dropping NA values


str_df = str_df.reset_index()
str_df = str_df.rename(columns={"index": "date", 157: "stringency_index"})

#changing string object to datetime
str_df["date"] = pd.to_datetime(str_df["date"], format="%d%b%Y")
str_df["date"] = str_df["date"].dt.date


region_map = {country_metadata["country_metadata"][i]["covid_region_code"]: country_metadata["country_metadata"][i]["iso3166-2_code"] for i in range (len(country_metadata["country_metadata"]))
}

corona_df ["region"] = corona_df["region_code"].map(region_map)




population_map = {country_metadata["country_metadata"][i]["iso3166-2_code"]: country_metadata["country_metadata"][i]["population"] for i in range (len(country_metadata["country_metadata"]))
}


corona_df["population"] = corona_df["region"].map(population_map) #adding another column
corona_df["cases_per_capita"] = corona_df["cases"]/corona_df["population"] #adding our factor - cases per capita



merged_df = corona_df.merge(sweden_weather_df, left_on=["date", "region"], right_on= ["date", "iso3166-2"]) 
#it matches two arguments, not merges on them. If you call the same thing twice, there will be one coulmn, but if you call two, it's different

merged_df = merged_df.drop("iso3166-2", axis = 1)
merged_df = merged_df.rename(columns = {"region_code":"region_name", "region": "region_code"})

merged_df["date"] = pd.to_datetime(merged_df["date"], format="%Y-%m-%d") #converting string type to datetime to match in merge with SI
merged_df["date"] = merged_df["date"].dt.date



str_df_regions = pd.concat([str_df], ignore_index=True) #in Sweden there are 21 regions

full_df = merged_df.merge(str_df_regions, on = "date") 

full_df

In [None]:
# defining dataframe with relevant information for regression

weather_variable_subplot_regression = full_df.drop("region_name",axis=1)
weather_variable_subplot_regression = weather_variable_subplot_regression.drop("population", axis=1)



weather_variable_subplot_regression["stringency_index"] = weather_variable_subplot_regression["stringency_index"].astype(int)

In [None]:
# definition of XS
Xs= ["RelativeHumiditySurface", "SolarRadiation", "Surfacepressure", "TemperatureAboveGround", "Totalprecipitation", "UVIndex", "WindSpeed", "stringency_index"]
weather_variable_subplot_regression = sm.add_constant(weather_variable_subplot_regression)
Xs.append("const")

In [None]:
# Multivariate regression with stringency index linear

est = sm.OLS(weather_variable_subplot_regression["cases"], weather_variable_subplot_regression[Xs], hasconst = True).fit()

print(est.summary())

In [None]:
# Multivariate regression with stringency index log

est = sm.OLS(np.log(weather_variable_subplot_regression["cases"]+1), weather_variable_subplot_regression[Xs], hasconst = True).fit()
print(est.summary())

In [None]:
# MUltivariate regression per capita, Yes stringency index, LOG

est = sm.OLS(np.log(weather_variable_subplot_regression["cases_per_capita"]+1), weather_variable_subplot_regression[Xs], hasconst = True).fit()
print(est.summary())

In [None]:
# multivariate regression for all the regions - log, changing XS adding the regions. cases per capita
Xs4= ["RelativeHumiditySurface", "SolarRadiation", "Surfacepressure", "TemperatureAboveGround", "Totalprecipitation", "UVIndex", "WindSpeed"]

regions= ["const",]

for region in set(weather_variable_subplot_regression["region_code"]):
    if region != "SE-K":
        weather_variable_subplot_regression[region] = (weather_variable_subplot_regression["region_code"] == region).astype(int)
        regions.append(region)
        Xs4.append(region)

weather_variable_subplot_regression["const"] = 1.0
#lists = []
#lists.append(weather_variable_subplot_regression.iloc[0])

#lists


#weather_variable_subplot_regression
est = sm.OLS(np.log(weather_variable_subplot_regression["cases_per_capita"] + 1), weather_variable_subplot_regression[regions], hasconst = True).fit()

print(est.summary())

In [None]:
#multivariate regression per capita, regions and weather log 

est = sm.OLS(np.log(weather_variable_subplot_regression["cases_per_capita"] + 1), weather_variable_subplot_regression[Xs], hasconst = True).fit()

print(est.summary())

In [None]:
#multivariate regression per capita, regions and wether log. clustered for standarderror NO

#est = sm.OLS(np.log(weather_variable_subplot_regression["cases_per_capita"] + 1), weather_variable_subplot_regression[Xs], hasconst = True).fit(cov_type = "cluster", cov_kwds = {"groups": weather_variable_subplot_regression["region_code"]}, use_t =True)

#print(est.summary())


In [None]:
# multivariate regression with all weather variable, NO Stringency index, linear

Xs3= ["RelativeHumiditySurface", "SolarRadiation", "Surfacepressure", "TemperatureAboveGround", "Totalprecipitation", "UVIndex", "WindSpeed"]
weather_variable_subplot_regression = sm.add_constant(weather_variable_subplot_regression)
Xs3.append("const")


est = sm.OLS(weather_variable_subplot_regression["cases"], weather_variable_subplot_regression[Xs3], hasconst = True).fit()

print(est.summary())

In [None]:
# multivariate regression with all weather variable, NO Stringency index, log

Xs3= ["RelativeHumiditySurface", "SolarRadiation", "Surfacepressure", "TemperatureAboveGround", "Totalprecipitation", "UVIndex", "WindSpeed"]
weather_variable_subplot_regression = sm.add_constant(weather_variable_subplot_regression)
Xs3.append("const")


est = sm.OLS(np.log(weather_variable_subplot_regression["cases"]+1), weather_variable_subplot_regression[Xs3], hasconst = True).fit()

print(est.summary())

In [None]:
# multivariate regression with only three values

Xs2= ["RelativeHumiditySurface", "TemperatureAboveGround", "WindSpeed"]

weather_variable_subplot_regression = sm.add_constant(weather_variable_subplot_regression)
Xs2.append("const")


est = sm.OLS(np.log(weather_variable_subplot_regression["cases_per_capita"]+1), weather_variable_subplot_regression[Xs2], hasconst = True).fit()

print(est.summary())
