In [1]:
import pandas as pd
import numpy as np
import os
import CRPS.CRPS as pscore
import copy
from joblib import dump, load
from scipy.stats import nbinom, poisson
from time import sleep
from tqdm import tqdm
import warnings

# create the feature- and actuals-data list
# set the feature and actuals year lists
feature_years = ['2017','2018','2019','2020']
actual_years = ['2018','2019','2020','2021']

actuals_df_list = []
features_df_list = []

# path to the current directory
current_dir = os.getcwd()

for i in range(len(feature_years)):
    # relative paths to the parquet files
    relative_path_features = os.path.join('..', 'data', 'cm_features_to_oct' + feature_years[i] + '.parquet')
    relative_path_actuals = os.path.join('..', 'data', 'cm_actuals_' + actual_years[i] + '.parquet')

    path_features = os.path.join(current_dir, relative_path_features)
    path_actuals = os.path.join(current_dir, relative_path_actuals)

    # append datasets to the lists
    actuals_df_list.append({'year':actual_years[i], 'data':pd.read_parquet(path_actuals, engine='pyarrow')})
    features_df_list.append({'year':feature_years[i], 'data':pd.read_parquet(path_features, engine='pyarrow')})

# concat the feature datasets, so that every data contains the observations starting with january 1990
for i in range(1,len(features_df_list)):
    features_df_list[i]['data'] = pd.concat([features_df_list[i-1]['data'], features_df_list[i]['data']])

country_list = sorted(features_df_list[3]['data'].index.get_level_values('country_id').unique().tolist())

# country group list of all four datasets
country_feature_group_list = []
country_actual_group_list = []
# fill list 
for i in range(len(features_df_list)):
    country_feature_group_list.append(features_df_list[i]['data'].groupby('country_id'))
    country_actual_group_list.append(actuals_df_list[i]['data'].groupby('country_id'))

In [2]:
# check for nan's
is_na_series = features_df_list[0]['data'].isna().sum()

for i in range(len(is_na_series)):
    if is_na_series[i] > 0 :
        print(str(is_na_series.index[i]) + ': ' + str(is_na_series[i]))

## First Neural Net 
The first NN is only to predict the month s=3 due to simplicity. It only has one input node, i.e. the last w observed fatalities ('ged_sb'). It has one hidden layer and two output nodes. One for the parameter n of the negative binomial distribution and one for the parameter p. 

In [4]:
features = country_feature_group_list[3].get_group(246)
features

Unnamed: 0_level_0,Unnamed: 1_level_0,gleditsch_ward,ged_sb,ged_ns,ged_os,acled_sb,acled_sb_count,acled_os,ged_sb_tsum_24,wdi_sp_pop_totl,ged_sb_tlag_1,...,ind_efficiency_t48,irr_agr_efficiency_t48,services_efficiency_t48,general_efficiency_t48,water_stress_t48,renewable_internal_pcap_t48,renewable_pcap_t48,splag_1_decay_ged_sb_5,splag_1_decay_ged_os_5,splag_1_decay_ged_ns_5
month_id,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
379,246,626,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10243050.0,0.0,...,10.067134,0.012448,7.508171,5.649194,4.226076,2570.783305,4894.375907,0.000000,0.000000,0.000000
380,246,626,60.0,640.0,0.0,380.0,10.0,1836.0,60.0,10243050.0,0.0,...,10.067134,0.012448,7.508171,5.649194,4.226076,2570.783305,4894.375907,3.961023,4.292905,4.457506
381,246,626,6.0,46.0,0.0,0.0,0.0,1.0,66.0,10243050.0,60.0,...,10.067134,0.012448,7.508171,5.649194,4.226076,2570.783305,4894.375907,3.904386,4.255288,4.386735
382,246,626,84.0,6.0,0.0,60.0,2.0,35.0,150.0,10243050.0,6.0,...,10.067134,0.012448,7.508171,5.649194,4.226076,2570.783305,4894.375907,3.877829,4.134149,4.344849
383,246,626,26.0,0.0,0.0,18.0,2.0,7.0,176.0,10243050.0,84.0,...,10.067134,0.012448,7.508171,5.649194,4.226076,2570.783305,4894.375907,3.824371,4.255625,4.277286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486,246,626,3.0,74.0,6.0,116.0,13.0,79.0,224.0,10447666.0,13.0,...,23.998790,0.041360,35.247547,18.559925,4.226076,2400.180641,4569.574682,4.678154,5.020182,4.985520
487,246,626,9.0,127.0,5.0,10.0,9.0,165.0,222.0,10447666.0,3.0,...,23.998790,0.041360,35.247547,18.559925,4.226076,2400.180641,4569.574682,4.925024,4.934203,5.179843
488,246,626,24.0,0.0,4.0,176.0,11.0,31.0,236.0,10447666.0,9.0,...,23.998790,0.041360,35.247547,18.559925,4.226076,2400.180641,4569.574682,4.869412,5.015899,5.117787
489,246,626,1.0,12.0,0.0,6.0,3.0,43.0,212.0,10447666.0,24.0,...,23.998790,0.041360,35.247547,18.559925,4.226076,2400.180641,4569.574682,4.842253,4.958510,5.057498


#### basic tensor operations

In [3]:
import torch
# in PyTorch there are no lists/arrays etc: everything is a tensor
y = torch.empty(2,2) # 2 dimensional tensor (matrix)
x = torch.rand(2,2, dtype = torch.float)

y.add_(x) # _ functions do an inplace operation -> modifies the variable that it is applied on
print(y[:,0]) # slicing same as in numpy/pandas
y[1,1].item() # returns the actual item itself
y.size()

x = torch.tensor(features['ged_sb'].tail(12).values)
x

tensor([0.8444, 0.3973])


NameError: name 'features' is not defined

#### gradient calculation with autograd

In [27]:
x = torch.randn(3, requires_grad=True) # req grad has to be specified if we want to calc the gradient
# if this is made, pytorch creates an operational graph (see picture below) e.g.
y = x + 2
print(y) # y has an addbackward attribute for backpropagation 

z = y*y*2
#z = z.mean()
print(z)

v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v) # dz/dx (vector jacobian product; chain rule)
print(x.grad) # the gradients


## preventing gradient history
x.requires_frad_(False) # first option
x.detach() # second option
with torch.no_grad(): # third option
    y = x + 2

tensor([2.0318, 2.1967, 2.3616], grad_fn=<AddBackward0>)
tensor([ 8.2561,  9.6508, 11.1548], grad_fn=<MulBackward0>)
tensor([0.8127, 8.7867, 0.0094])


![image.png](attachment:image.png)

In [36]:
## training example
weights = torch.ones(4, requires_grad=True) # whenever we want to calculate the gradients we have to set the req_grad = True

iterations = 3

for epoch in range(iterations):
    model_output = (weights*3).sum()

    model_output.backward()

    print(weights.grad)

    weights.grad.zero_() # empty gradients

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


### backpropagation

![image.png](attachment:image.png)

#### computational graph
Used to calculate local gradients

![image-3.png](attachment:image-3.png)

regression example with squared error as loss function

#### general steps of backward prop
![image-3.png](attachment:image-3.png)

on the left is the final gradient obtained by the chainrule
![image.png](attachment:image.png)

example
![image-2.png](attachment:image-2.png)

In [None]:
## code the above example in pytorch

x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

# forward pass and compute the loss
y_hat = w*x
loss = (y_hat - y)**2
print(loss)

# backward pass (the whole gradient computation)
loss.backward()
print(w.grad) # first gradient after the first backward pass

## update weigths
## next forward and backward etc.

#### gradient descent with autograd

In [10]:
## whole pipline for a neural network with pytorch
# 1) Design model (input, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training loop
#  - forward pass: compute prediction
#  - backward pass: gradients
#  - update weights

from neuralforecast import NeuralForecast
from neuralforecast.losses.pytorch import MAE

import torch.nn as nn # Neural network module
#f = w*x

#f = 2*x
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

def forward(x):
    return w * x

# Training
learning_rate = 0.01
n_iters = 100

#loss = nn.MSELoss()
loss = MAE()

optimizer = torch.optim.SGD([w], lr = learning_rate) # stochastic gradient descent


for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    print(y_pred)

    # loss 
    l = loss(Y, y_pred)

    # gradients = backward pass
    l.backward() # dl/dw

    # update weigts
    optimizer.step()

    # zero gradients
    optimizer.zero_grad()

    if epoch % 10 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss= {l:.8f}')

print(f'Prediction ofter training: f(5) = {forward(5):.3f}')

tensor([0., 0., 0., 0.], grad_fn=<MulBackward0>)
epoch 1: w = 0.025, loss= 5.00000000
tensor([0.0250, 0.0500, 0.0750, 0.1000], grad_fn=<MulBackward0>)
tensor([0.0500, 0.1000, 0.1500, 0.2000], grad_fn=<MulBackward0>)
tensor([0.0750, 0.1500, 0.2250, 0.3000], grad_fn=<MulBackward0>)
tensor([0.1000, 0.2000, 0.3000, 0.4000], grad_fn=<MulBackward0>)
tensor([0.1250, 0.2500, 0.3750, 0.5000], grad_fn=<MulBackward0>)
tensor([0.1500, 0.3000, 0.4500, 0.6000], grad_fn=<MulBackward0>)
tensor([0.1750, 0.3500, 0.5250, 0.7000], grad_fn=<MulBackward0>)
tensor([0.2000, 0.4000, 0.6000, 0.8000], grad_fn=<MulBackward0>)
tensor([0.2250, 0.4500, 0.6750, 0.9000], grad_fn=<MulBackward0>)
tensor([0.2500, 0.5000, 0.7500, 1.0000], grad_fn=<MulBackward0>)
epoch 11: w = 0.275, loss= 4.37500000
tensor([0.2750, 0.5500, 0.8250, 1.1000], grad_fn=<MulBackward0>)
tensor([0.3000, 0.6000, 0.9000, 1.2000], grad_fn=<MulBackward0>)
tensor([0.3250, 0.6500, 0.9750, 1.3000], grad_fn=<MulBackward0>)
tensor([0.3500, 0.7000, 1.0500,

#### training pipeline

In [12]:
import torch
import torch.nn as nn
from neuralforecast.losses.pytorch import MSE

# Beispiel Daten
y_true = torch.tensor([1.0, 2.0, 3.0])
y_pred = torch.tensor([2.0, 2.5, 3.5])

# Erstellen der Loss-Funktion
#loss_fn = nn.MSELoss()
loss_fn = MSE()

# Berechnung der Loss
loss = loss_fn(y_pred, y_true)

print(loss)


tensor(0.5000)


#### dataset and dataloader classes

#### dataset transforms

#### activation functions: softmax and cross entropy loss