In [1]:
import h5py
import pandas as pd
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Resizing, Dropout, BatchNormalization, Activation, Add, GlobalAveragePooling2D, Input, Reshape, Conv2DTranspose, Cropping2D

from keras.optimizers import Adam
import numpy as np
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

from tools import *






Normal CNN 10-dim vector

In [2]:
# load clean gdp data, keep only year, region and real_gdp columns
ukraine = pd.read_csv("data/tabular_data_ukraine.csv")

# delete Kyiv and Kyiv_Oblast
ukraine = ukraine[ukraine["region"] != "Kyiv"]
ukraine = ukraine[ukraine["region"] != "Kyiv_Oblast"]

# get the data for 2021, 2022 and before 2022
ukraine_2022 = ukraine[ukraine["year"] == 2022]
ukraine = ukraine[ukraine["year"].astype(int) < 2022]
ukraine_2021 = ukraine[ukraine["year"] == 2021]
ukraine_2021.reset_index(drop=True, inplace=True)
ukraine_2022.reset_index(drop=True, inplace=True)
ukraine.reset_index(drop=True, inplace=True)

# Initialise a three dimensional array to store the images with the shape (number of images, height, width, channels)
X = np.zeros((len(ukraine), 765, 1076, 1))
y = np.zeros(len(ukraine))

# load the images
for i in range(len(ukraine)):

    # get year, region, and gdp
    year = ukraine["year"][i]
    region = ukraine["region"][i]
    gdp_value = ukraine["real_gdp"][i]

    # load the image
    file_name = f"{year}_{region}_hq.h5"
    file_path = f"data/annual_region_images/{file_name}"
    
    with h5py.File(file_path, 'r') as annual_region:
        allangle_snow_free = annual_region["AllAngle_Composite_Snow_Free"][:]

    # add the values
    y[i] = gdp_value
    X[i, :, :, 0] = allangle_snow_free

# normalise the images and gdp data
maximum_x = X.max()
X = X / maximum_x

maximum_y = y.max()
y = y / maximum_y

# get indices for observations with  year = 2021: this is the test set
test_indices = np.where(ukraine["year"].astype(int) == 2021)[0]
train_indices = np.where(ukraine["year"].astype(int) != 2021)[0]

# get the train and test sets
X_train, y_train, X_test, y_test = X[train_indices], y[train_indices], X[test_indices], y[test_indices]

# get the prediction data
X_pred = np.zeros((len(ukraine_2022), 765, 1076, 1))
for i in range(len(ukraine_2022)):

    year = ukraine_2022["year"][i]
    region = ukraine_2022["region"][i]

    file_name = f"{year}_{region}_hq.h5"
    file_path = f"data/annual_region_images/{file_name}"

    with h5py.File(file_path, 'r') as annual_region:
        allangle_snow_free = annual_region["AllAngle_Composite_Snow_Free"][:]
    
    X_pred[i, :, :, 0] = allangle_snow_free

X_pred = X_pred / maximum_x

In [3]:
# define the model and train it 
model = Sequential()
# Resizing the images
model.add(Resizing(300, 440, input_shape=(765, 1076, 1)))
# Start with Convolutional layers
model.add(Conv2D(8, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(32, (3, 3), activation='relu'))  # Additional Conv layer
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(128, (3, 3), activation='relu'))  # Additional Conv layer
# model.add(MaxPooling2D((2, 2)))
# Flatten the results to feed into a dense layer
model.add(Flatten())
# Add dense layers (hidden layers)
model.add(Dense(16, activation='relu'))  # Upscaled dense layer
model.add(Dense(8, activation='relu'))   # Additional dense layer
# Output layer
model.add(Dense(10))
# Compile the model
model.compile(optimizer=Adam(), loss='mean_absolute_error', metrics=['mae'])

model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2)

# use the model to extract features
y_pred = model.predict(X_train)
ukraine_stage_2 = pd.DataFrame(columns = ["region", "year", "real_gdp", "feature_1", "feature_2", "feature_3", "feature_4", "feature_5", "feature_6", "feature_7", "feature_8", "feature_9", "feature_10"])
ukraine_train = ukraine[ukraine["year"] != 2021]
ukraine_train.reset_index(drop=True, inplace=True)
for i in range(len(ukraine_train)):
    ukraine_stage_2.loc[i, "region"] = ukraine_train["region"][i]
    ukraine_stage_2.loc[i, "year"] = ukraine_train["year"][i]
    ukraine_stage_2.loc[i, "real_gdp"] = ukraine_train["real_gdp"][i]
    for j in range(10):
        ukraine_stage_2.loc[i, f"feature_{j+1}"] = y_pred[i][j]

# get the features on the test set
feature_predictions = model.predict(X_test)

# add the feature predictions to the ukraine_2021 df
for i in range(len(ukraine_2021)):
    for j in range(10):
        ukraine_2021.loc[i, f"feature_{j+1}"] = feature_predictions[i][j]

ukraine_2021 = ukraine_2021[["region", "year", "real_gdp", "feature_1", "feature_2", "feature_3", "feature_4", "feature_5", "feature_6", "feature_7", "feature_8", "feature_9", "feature_10"]]

# get the features on the prefdiction set
feature_predictions = model.predict(X_pred)

# add the feature predictions to the ukraine_2022 df
for i in range(len(ukraine_2022)):
    for j in range(10):
        ukraine_2022.loc[i, f"feature_{j+1}"] = feature_predictions[i][j]

ukraine_2022 = ukraine_2022[["region", "year", "feature_1", "feature_2", "feature_3", "feature_4", "feature_5", "feature_6", "feature_7", "feature_8", "feature_9", "feature_10"]]



Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ukraine_2021.loc[i, f"feature_{j+1}"] = feature_predictions[i][j]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ukraine_2021.loc[i, f"feature_{j+1}"] = feature_predictions[i][j]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ukraine_2021.loc[i, f"feature_{j+1}"] = feature_predictions[i][j]
A value



In [5]:
year = 2021
train_data = ukraine_stage_2
test_data = ukraine_2021
prediction_data = ukraine_2022

# change all features to floats
for i in range(1, 11):
    train_data[f"feature_{i}"] = train_data[f"feature_{i}"].astype(float)
    test_data[f"feature_{i}"] = test_data[f"feature_{i}"].astype(float)
    prediction_data[f"feature_{i}"] = prediction_data[f"feature_{i}"].astype(float)

pre_war_data = pd.concat([train_data, test_data])
selected_columns =  ["feature_" + str(i) for i in range(1, 11)]
model_type = "xgboost"
log_transform = False
scale = False
prediction_data["real_gdp"] = 0

param_grid_xgb = {
    'eta': [0.01, 0.1, 0.2, 0.3, 0.5],
    'gamma': [100, 1000, 10000],
    'max_depth': [4, 6, 8, 10],
    'min_child_weight': [1, 2, 5],
    'random_state': [0] 
}

mae, mpe, _, pred = build_model(train_data, test_data, selected_columns, model_type, param_grid_xgb, log_transform, scale)
gdp_change, best_params = predict_with_model(pre_war_data, prediction_data, selected_columns, model_type, param_grid_xgb, log_transform, scale)

print(mae, mpe) 
print(gdp_change)


1400.513376992304 4.129304602268437
-30.86347069580914


In [9]:
ukraine_stage_2

Unnamed: 0,region,"year,",real_gdp,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,year
0,Vinnytsia_Oblast,,33024.0,0.215920,0.247569,0.214081,0.205279,0.229436,0.172872,0.013083,0.208745,0.228640,0.221205,0
1,Vinnytsia_Oblast,,34609.152,0.218979,0.253163,0.217013,0.208542,0.234130,0.173977,0.009017,0.213513,0.233226,0.225674,0
2,Vinnytsia_Oblast,,36201.172992,0.215577,0.246939,0.213740,0.204883,0.228857,0.172741,0.013568,0.208158,0.228080,0.220704,0
3,Vinnytsia_Oblast,,35151.338975,0.207055,0.231305,0.205379,0.195353,0.215013,0.169566,0.025334,0.194098,0.214626,0.208243,0
4,Vinnytsia_Oblast,,37436.176009,0.209129,0.235102,0.207383,0.197603,0.218261,0.170324,0.022540,0.197397,0.217793,0.211274,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202,Chernihiv_Oblast,,21651.685716,0.144928,0.116673,0.144257,0.126476,0.115804,0.146077,0.110705,0.092877,0.117705,0.117081,0
203,Chernihiv_Oblast,,22084.71943,0.145462,0.118410,0.145479,0.127720,0.117165,0.146862,0.109138,0.094699,0.119322,0.118193,0
204,Chernihiv_Oblast,,23012.277646,0.145148,0.117389,0.144761,0.126989,0.116365,0.146401,0.110059,0.093628,0.118372,0.117539,0
205,Chernihiv_Oblast,,23403.486366,0.145147,0.117386,0.144759,0.126986,0.116362,0.146399,0.110062,0.093625,0.118369,0.117537,0
