In [1]:
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score


from keras.models import Sequential
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


In [2]:
df = pd.read_csv('train.csv')
df = df.drop_duplicates()
print(df.shape)
df.head()

(252123, 91)


Unnamed: 0,Year,S0,S1,S2,S3,S4,S5,S6,S7,S8,...,S80,S81,S82,S83,S84,S85,S86,S87,S88,S89
0,2007,44.76752,114.82099,3.83239,27.99928,1.49153,-15.90853,28.24844,3.6165,-7.24653,...,-1.89619,-471.02844,411.56205,443.01198,19.30254,309.07806,-336.91706,-14.70547,-474.44157,31.3282
1,2004,52.28942,75.73319,11.35941,-6.20582,-27.64559,-30.75995,12.50955,7.47877,9.88498,...,4.5706,1.3611,-6.52977,59.48672,3.6979,-36.92252,44.08077,3.39993,-70.07591,3.86143
2,2005,33.81773,-139.07371,134.19332,17.85216,63.47408,-25.28005,-34.65911,-5.99135,1.27848,...,54.16608,15.0453,39.09107,39.03041,3.68708,-61.88547,45.68115,6.39822,3.24471,35.74749
3,1998,41.60866,3.17811,-3.97174,23.53564,-19.68553,20.74407,18.80866,6.24474,-7.98424,...,28.08591,295.88684,54.02395,102.0288,40.47711,15.10258,-250.32293,2.81288,56.05172,3.60432
4,1987,44.49525,-32.2527,58.08217,3.73684,-32.53274,-18.72885,-15.85665,-3.34607,22.63786,...,31.44988,-136.50457,-85.11989,-74.96342,9.56921,-100.61689,-133.29315,9.19246,-97.37953,30.11015


In [6]:
X = df.drop('Year', axis=1)
y = df['Year']


X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1, random_state=42)

print("Number of train set: ", X_train.shape[0])
print("Number of validation set: ", X_val.shape[0])
print("Numebr of test set: ", X_test.shape[0])


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)




Number of train set:  181528
Number of validation set:  20170
Numebr of test set:  50425


In [9]:
hidden_sizes = [64, 128, 256]  # Dimensioni dei layer nascosti
depths = [2, 3, 4]  # Numero di layer nascosti
dropouts = [0.3, 0.4, 0.5]  # Dropout 
batch_sizes = [32, 64, 128]  # Dimensioni del batch

results = []

for hidden_size in hidden_sizes:
    for depth in depths:
        for dropout in dropouts:
            for batch_size in batch_sizes:
                model = Sequential()
                model.add(Dense(hidden_size, input_dim=X_train.shape[1], activation='relu'))
                for _ in range(depth - 1):  
                    model.add(Dropout(dropout))
                    model.add(Dense(hidden_size, activation='relu'))
                model.add(Dense(1)) 
                
                learning_rate = 0.001  
                
                optimizer = Adam(learning_rate=learning_rate)

                model.compile(optimizer=optimizer, loss='mean_squared_error')

                early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

                reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.0001)
                
                history = model.fit(
                    X_train, 
                    y_train, 
                    epochs=200, 
                    batch_size=batch_size, 
                    validation_data=(X_val, y_val), 
                    callbacks=[early_stopping, reduce_lr],
                    verbose=0  
                )
                
                train_loss = model.evaluate(X_train, y_train, verbose=2)
                val_loss = model.evaluate(X_val, y_val, verbose=2)
                
                results.append({
                    'hidden_size': hidden_size,
                    'depth': depth,
                    'dropout': dropout,
                    'batch_size': batch_size,
                    'train_loss': train_loss,
                    'val_loss': val_loss
                })

for result in results:
    print(result)


5673/5673 - 1s - loss: 63.8900 - 1s/epoch - 185us/step
631/631 - 0s - loss: 69.6590 - 133ms/epoch - 210us/step
5673/5673 - 1s - loss: 66.7849 - 1s/epoch - 186us/step
631/631 - 0s - loss: 70.0763 - 128ms/epoch - 203us/step
5673/5673 - 1s - loss: 65.5250 - 1s/epoch - 185us/step
631/631 - 0s - loss: 69.7306 - 129ms/epoch - 204us/step
5673/5673 - 1s - loss: 64.2362 - 1s/epoch - 188us/step
631/631 - 0s - loss: 69.8102 - 129ms/epoch - 204us/step
5673/5673 - 1s - loss: 67.0661 - 1s/epoch - 186us/step
631/631 - 0s - loss: 70.3432 - 128ms/epoch - 202us/step
5673/5673 - 1s - loss: 65.3082 - 1s/epoch - 188us/step
631/631 - 0s - loss: 69.9076 - 128ms/epoch - 204us/step
5673/5673 - 1s - loss: 68.3250 - 1s/epoch - 185us/step
631/631 - 0s - loss: 71.5267 - 132ms/epoch - 209us/step
5673/5673 - 1s - loss: 68.5359 - 1s/epoch - 185us/step
631/631 - 0s - loss: 71.6644 - 127ms/epoch - 202us/step
5673/5673 - 1s - loss: 68.8370 - 1s/epoch - 184us/step
631/631 - 0s - loss: 72.0904 - 127ms/epoch - 201us/step
5

In [10]:
best_result = min(results, key=lambda x: x['val_loss'])

print(f"Miglior configurazione: {best_result}")

Miglior configurazione: {'hidden_size': 256, 'depth': 2, 'dropout': 0.4, 'batch_size': 64, 'train_loss': 50.56438064575195, 'val_loss': 67.57671356201172}


### I migliori risultati si ottengono con la seguente rete neurale 

In [7]:
model = Sequential()
model.add(Dense(256, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(256, activation='relu'))
model.add(Dense(1))  

model.summary()

learning_rate = 0.001  
optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='mean_squared_error')

# Blocca addestramento se non migliora il val_loss per 10 epoche
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Riduci il learning rate se non migliora il val_loss per 5 epoche
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.0001)

history = model.fit(
    X_train, 
    y_train, 
    epochs=200, 
    batch_size=64, 
    validation_data=(X_val, y_val), 
    callbacks=[early_stopping, reduce_lr]
)


train_loss = model.evaluate(X_train, y_train, verbose=0)
val_loss = model.evaluate(X_val, y_val, verbose=0)

print(f"Train Loss migliore epoca: {train_loss}")
print(f"Validation Loss migliore epoca: {val_loss}")

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 256)               23296     
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_4 (Dense)             (None, 256)               65792     
                                                                 
 dense_5 (Dense)             (None, 1)                 257       
                                                                 
Total params: 89,345
Trainable params: 89,345
Non-trainable params: 0
_________________________________________________________________
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/

In [8]:
y_pred = model.predict(X_test)

print("MSE:", mean_squared_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MAPE:", mean_absolute_percentage_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))

MSE: 67.1265814893956
RMSE: 8.193081318368298
MAE: 5.648528891144026
MAPE: 0.00283522677068378
R2: 0.3841174265275764


In [6]:
file_scaler = open("modelli/NN/scaler_nn.save","wb")
pickle.dump(scaler, file_scaler)
file_scaler.close()
model.save("modelli/NN/model")

2024-02-05 09:24:38.441149: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,256]
	 [[{{node inputs}}]]
2024-02-05 09:24:38.585232: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,256]
	 [[{{node inputs}}]]


INFO:tensorflow:Assets written to: modelli/NN/model/assets
