In [None]:
!pip install statsmodels

import pandas as pd
import json
import numpy as np 
import statsmodels.api as sm 
from scipy.stats import pearsonr, spearmanr 

![Picture title](image-20220215-122736.png)

In [None]:
geo_json_path = "../data/raw/shapefiles/se.geojson" 
corona_df = pd.read_csv("../data/raw/corona/se_corona.csv", sep = "\t")
with open("../data/raw/metadata/se_metadata.json", "r", encoding="utf-8") as f: country_metadata = json.load(f)

region_map = {country_metadata["country_metadata"][i]["covid_region_code"]: country_metadata["country_metadata"][i]["iso3166-2_code"] for i in range(len(country_metadata["country_metadata"]))}
corona_df["iso3166-2"] = corona_df["region_code"].map(region_map)

population_map = {country_metadata["country_metadata"][i]["iso3166-2_code"]: country_metadata["country_metadata"][i]["population"] for i in range (len(country_metadata["country_metadata"]))
}

corona_df ["region"] = corona_df["region_code"].map(region_map)


corona_df_by_region = corona_df.groupby(by = "region")["confirmed_addition"].sum().reset_index()
corona_df_by_region["population"] = corona_df_by_region["region"].map(population_map)
corona_df_by_region["per_capita"] = corona_df_by_region["confirmed_addition"] / corona_df_by_region["population"]


weather_df = pd.read_csv("../data/raw/weather/weather.csv")
# should be run only ONCE
weather_df["TemperatureAboveGround"] = weather_df["TemperatureAboveGround"] - 273.15
weather_df = weather_df[weather_df["iso3166-2"].str.startswith("SE")]

df = corona_df.merge(weather_df, left_on=["date", "iso3166-2"], right_on= ["date", "iso3166-2"])
#df = df.merge(corona_df_by_region, left_on=["region"], right_on=["region"])

df


In [None]:
Xs= ["RelativeHumiditySurface", "SolarRadiation", "Surfacepressure", "TemperatureAboveGround", "Totalprecipitation", "UVIndex", "WindSpeed"]

df = sm.add_constant(df)
Xs.append("const")

In [None]:
print(Xs)

In [None]:
est = sm.OLS(df["confirmed_addition"], df[Xs], hasconst = True).fit()

print(est.summary())

In [None]:
est = sm.OLS(np.log(df["confirmed_addition"]+1), df[Xs], hasconst = True).fit()
print(est.summary())

In [None]:
regions= ["const",]

for region in set(df["iso3166-2"]):
    if region != "SE-K":
        df[region] = (df["iso3166-2"] == region).astype(int)
        regions.append(region)
        Xs.append(region)

est = sm.OLS(np.log(df["per_capita"] + 1), df[regions], hasconst = True).fit()

print(est.summary())