# Reading the Climate variables

In the following climate variables are computed. This includes monthly average temperature, heat days per month, etc.

In [69]:
import pandas as pd
import numpy as np
from weather.src.helper import read_nuts_weather_data
from linearmodels.panel import PanelOLS

## Reading the weather data

One thing that needs to be addressed is that the soep data uses different geographic naming convetions than meteostat.
In the following is the conversion chart:
| Meteostat | SOEP | Name |
|-----|----|------------------------|
| DE6 | 2  | Hamburg                |
| DEF | 1  | Schleswig-Holstein     |
| DE9 | 3  | Niedersachsen          |
| DE5 | 4  | Bremen                 |
| DEA | 5  | Nordrhein-Westfalen    |
| DE7 | 6  | Hessen                 |
| DEB | 7  | Rheinland-Pfalz        |
| DE1 | 8  | Baden-Wuerttemberg     |
| DE2 | 9  | Bayern                 |
| DEC | 10 | Saarland               |
| DE3 | 11 | Berlin                 |
| DE4 | 12 | Brandenburg            |
| DE8 | 13 | Mecklenburg-Vorpommern |
| DED | 14 | Sachsen                |
| DEE | 15 | Sachsen-Anhalt         |
| DEG | 16 | Thuringen              |

In [70]:
# reading the weather data for NUTS 1 area codes
weather = read_nuts_weather_data('./weather/prod/weatherdata/nuts1', bar=False)

# rename variables
chart = {
    "DE6" : 2, "DEF" : 1, "DE9" : 3, "DE5" : 4, "DEA" : 5, "DE7" : 6, "DEB" : 7, "DE1" : 8, "DE2" : 9, 
    "DEC" : 10, "DE3" : 11, "DE4" : 12, "DE8" : 13, "DED" : 14, "DEE" : 15, "DEG" : 16
}
weather["NUTS_CODE"].replace(chart, inplace=True)

# drop unusefull columns and set new index
weather.drop(["wdir", "wpgt", "pres", "elevation"], axis=1, inplace=True)
weather.set_index("time", inplace=True)
weather

Unnamed: 0_level_0,tavg,tmin,tmax,prcp,snow,wspd,tsun,NUTS_CODE
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1985-01-01,-1.455556,-5.926667,-0.368889,7.133333,40.000000,19.326667,5.777778,8
1985-01-02,-3.862222,-5.468889,-0.400000,5.442222,113.333333,14.513333,0.222222,8
1985-01-03,-6.426667,-8.971111,-4.477778,4.691111,170.666667,11.366667,44.222222,8
1985-01-04,-11.495556,-14.937778,-5.075556,1.668889,229.555556,12.793333,202.888889,8
1985-01-05,-13.100000,-17.924444,-10.142222,0.720000,239.555556,5.526667,102.666667,8
...,...,...,...,...,...,...,...,...
2022-12-04,-2.333333,-3.041667,-1.541667,,,13.341667,,16
2022-12-05,-1.616667,-2.258333,-0.758333,,,11.208333,,16
2022-12-06,-1.058333,-1.933333,0.133333,,,10.708333,,16
2022-12-07,-0.283333,-1.450000,1.225000,,,11.716667,,16


# Computing the Climate Variables

In the following the weather data is read and used to compute meaningfull variables.
The variables that are computet are the `mean` and the `max` temperature of that month.

In [71]:

heatdays = lambda x: len(x[x > 35])
frostdays = lambda x: len(x[x > -5])

weather:pd.DataFrame
weather = weather.groupby('NUTS_CODE').rolling('30D')\
         .agg({'tavg':np.mean, 'tmax':heatdays, 'tmin':frostdays, 'prcp':np.mean, 'snow':np.mean, 'tsun':np.mean})
weather

Unnamed: 0_level_0,Unnamed: 1_level_0,tavg,tmax,tmin,prcp,snow,tsun
NUTS_CODE,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1985-01-01,-0.771429,0.0,1.0,2.092857,34.357143,0.000000
1,1985-01-02,-1.595714,0.0,2.0,1.310714,40.071429,33.300000
1,1985-01-03,-3.014921,0.0,2.0,1.350000,43.833333,106.800000
1,1985-01-04,-3.844524,0.0,2.0,1.071429,51.071429,124.650000
1,1985-01-05,-4.815619,0.0,2.0,0.920220,54.700000,162.360000
...,...,...,...,...,...,...,...
16,2022-12-04,4.121673,0.0,28.0,1.159646,4.420745,173.156250
16,2022-12-05,3.853038,0.0,28.0,1.198273,4.612951,172.184783
16,2022-12-06,3.611094,0.0,28.0,1.242847,4.822631,162.511364
16,2022-12-07,3.286395,0.0,28.0,1.291358,5.052280,165.351190


## Read soep data

In [72]:
soep = pd.read_csv('./prod/soeplong.csv')
soep['time'] = pd.to_datetime(soep['time'])
soep.rename(columns={'bula_h':'NUTS_CODE'}, inplace=True)
soep.set_index(["NUTS_CODE", "time"], inplace=True)

soep = soep.join(weather)
soep = soep.reset_index().set_index(['pid', 'time'])
soep.to_csv('./prod/final.csv')