# Run a NN with x and $\xi$ as the input to the NN model and second stage objective as the target

## Import Modules

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from ast import literal_eval
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import json
import csv

## Load the data of first stage decisions, uncertainities, instances and second stage objectives

In [2]:
pwd()

'/ztank/scratch/user/u.rd143338/ss_from_nn/Neural_second_stage'

In [3]:
## Directory of data for 10 items

direc = pwd()
filename = "instance_1_250_items_30_num_of_first_stage_11_scenarios_100_total_obj"

# path contains the location to the csv files
path=os.path.join(direc, filename)

In [4]:
## Directory for saving files

direc = pwd()
filename = "instance 30 scenario 50"

# path contains the location to the csv files
path_save=os.path.join(direc, filename)

In [5]:
path_save

'/ztank/scratch/user/u.rd143338/ss_from_nn/Neural_second_stage/instance 30 scenario 50'

In [6]:
def save_weights_biases_to_json(model, folder, filename):
    # Extract weights and biases from the model
    weights_biases = {}
    for name, param in model.named_parameters():
        weights_biases[name] = param.data.tolist()

    # Create the folder if it doesn't exist
    if not os.path.exists(folder):
        os.makedirs(folder)

    # Save weights and biases to a JSON file
    file_path = os.path.join(folder, filename)
    with open(file_path, 'w') as json_file:
        json.dump(weights_biases, json_file)


In [7]:
def combine_csv_data(folder_path):
    # Get a list of all CSV files in the folder
    csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]
    
    # Initialize an empty DataFrame to store combined data
    dfs = []
    
    # Loop through each CSV file
    for file in csv_files:
        # Read the CSV file into a DataFrame
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)
        
        # Append the DataFrame to the combined DataFrame
        dfs.append(df)
    combined_df = pd.concat(dfs, ignore_index=True)
    return combined_df

## Combined data from csv files

folder_path = path
combined_data = combine_csv_data(folder_path)
combined_test_data = combined_data[combined_data["seed"]>200].reset_index(drop=True)
combined_data = combined_data[combined_data["seed"]<=200].reset_index(drop=True)

In [10]:
combined_data.shape

(110000, 15)

## Shuffle the dataset for training

In [11]:
combined_data=combined_data.sample(frac=1, random_state=random.seed(1)).reset_index(drop=True)
combined_data.head()

Unnamed: 0,f,x,r,second_stage_obj,First stage value,p_bar,Reduced Capacity,t,p_hat,y,original_capacity,w,uncern,gamma,seed
0,"[396.51219577782365, 1297.8095999554187, 613.7...","[-0.0, 1.0, 1.0, -0.0, -0.0, 1.0, 1.0, 1.0, -0...","[0.0, 1.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0...",-20095.33699,-16027.892714,"[355.7005202119573, 993.3520692471163, 448.251...",11101.129865,"[658.9507167826202, 17.97450708363044, 402.464...","[308.1957699511097, 745.5244443487112, 100.020...","[0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, ...",11685.399858,"[853.611307630359, 300.00726164821776, 723.006...","[0.973, 0.2367, 0.007, 0.1573, 0.3078, 0.1532,...",6.0,64
1,"[623.6289195525793, 1081.5923852784172, 612.38...","[0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",-16263.838092,-13166.418746,"[451.24504903570255, 828.5511121456775, 500.34...",6055.474844,"[520.3593549581708, 39.10659894389094, 286.759...","[292.0484114336391, 216.81785799918933, 151.00...","[0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, ...",6055.474844,"[613.3055991877771, 440.2250701920572, 291.024...","[0.2969, 0.1141, 0.1906, 0.0383, 0.0057, 0.014...",6.0,81
2,"[1253.4265962772395, 556.4247037403201, 1199.2...","[1.0, 1.0, 1.0, 1.0, 1.0, 0.0, -0.0, 1.0, 0.0,...","[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",-13137.21865,-10012.129735,"[981.6582930980816, 383.32510821236207, 863.99...",4453.682894,"[26.692930038231935, 194.57743276540472, 221.0...","[614.1671076219156, 120.6110418808221, 588.167...","[1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...",4948.536549,"[232.3117396742037, 303.24539937694857, 272.11...","[0.1155, 0.0884, 0.0358, 0.0336, 0.088, 0.0264...",6.0,122
3,"[323.08191164133547, 1203.2056738115616, 1133....","[-0.0, 1.0, 1.0, 1.0, -0.0, 1.0, 1.0, -0.0, 1....","[0.0, 1.0, -0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 1.0...",-18684.906823,-14448.08057,"[272.2318350006311, 861.5189571203193, 760.206...",10129.73379,"[735.364755016534, 93.76124096413382, 530.2067...","[141.27353893889182, 424.45636372006715, 409.1...","[0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, ...",11576.838617,"[944.638879203141, 667.9007690187085, 875.5204...","[0.1799, 0.0902, 0.0378, 0.0171, 0.0235, 0.051...",4.5,57
4,"[702.4213615041667, 854.7385241285273, 1297.18...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, -0.0, 1.0,...","[-0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0,...",-18641.801475,-14374.18837,"[619.4876425897213, 630.9511011185651, 978.567...",8924.598138,"[370.4243386903249, 20.728789708443763, 229.63...","[454.5042398553577, 524.528056085521, 501.9825...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, ...",11515.610501,"[654.065418918689, 153.50070130538387, 791.126...","[0.0009, 0.227, 0.0927, 0.1838, 0.0729, 0.09, ...",6.0,151


## The element list of the dataframe is split and new dataframe for instances, X and uncertainities are formed

In [12]:
def split_list_column(df, col_name):
    col=df[col_name].apply(literal_eval).apply(pd.Series)
    new_columns = [f'{col_name}_{i}' for i in range(1,len(col.columns)+1)]
    col.columns=new_columns
    return col

In [13]:
instance_parameter = ['f', 'p_bar', 't', 'p_hat', 'original_capacity', 'w', 'gamma']

In [14]:
instance_df=pd.DataFrame()
for i in instance_parameter:
    if i!='original_capacity' and i!='gamma':
        instance_df=pd.concat([instance_df, split_list_column(combined_data,i)], axis=1)
    else:
        instance_df=pd.concat([instance_df, combined_data[i]], axis=1)

In [15]:
instance_df.shape

(110000, 152)

In [16]:
p_bar = instance_df.loc[: , instance_df.columns.str.startswith('p_bar')].reset_index(drop=True)

In [17]:
p_bar

Unnamed: 0,p_bar_1,p_bar_2,p_bar_3,p_bar_4,p_bar_5,p_bar_6,p_bar_7,p_bar_8,p_bar_9,p_bar_10,...,p_bar_21,p_bar_22,p_bar_23,p_bar_24,p_bar_25,p_bar_26,p_bar_27,p_bar_28,p_bar_29,p_bar_30
0,355.700520,993.352069,448.251782,49.615354,39.880775,861.371579,481.199777,629.292516,330.464351,940.928686,...,906.587488,547.259399,504.896821,930.298415,840.357279,454.571237,407.002246,622.795989,255.391495,723.444461
1,451.245049,828.551112,500.348184,417.844589,889.071221,504.839585,550.074016,434.011981,987.160709,291.770814,...,425.839505,756.133285,771.896988,116.525823,760.526197,921.137421,503.677264,909.711514,411.812343,721.403019
2,981.658293,383.325108,863.991257,697.432032,926.911016,686.134790,288.246217,697.917332,428.762568,293.282648,...,358.707533,624.684349,131.275083,100.557140,543.332195,631.620488,91.539625,549.677607,297.158315,190.495717
3,272.231835,861.518957,760.206190,754.535561,271.581730,613.204899,525.153060,338.547404,755.578391,426.308419,...,142.348736,249.824057,556.234055,990.389523,490.080668,447.610723,374.467420,621.505147,398.149520,616.580119
4,619.487643,630.951101,978.567517,568.807481,969.247187,647.795203,320.863978,284.979759,419.758544,310.364364,...,308.962467,486.936609,883.757275,665.768010,525.049157,453.565032,709.626002,762.306653,503.552278,873.383072
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109995,406.974009,134.835375,221.696547,949.975132,524.460573,692.092738,79.673939,799.199671,852.728126,811.924969,...,542.180860,910.114067,612.059841,105.043770,205.106794,379.878876,41.327088,108.364379,284.911174,806.399765
109996,409.253690,187.868659,382.280181,438.395836,90.506140,983.559855,615.280176,433.334421,269.869480,790.341454,...,635.495003,705.257189,932.501732,149.921551,920.986371,851.515818,968.173504,42.388695,824.213505,678.313412
109997,300.621711,757.784155,995.033276,54.534988,426.567721,781.574635,162.549687,793.789982,722.199047,144.112877,...,842.102305,761.536460,498.388247,42.538096,451.302462,727.895623,61.076862,868.118344,627.368470,17.033234
109998,494.126779,581.907702,18.792506,306.733403,996.485297,850.778355,47.389366,510.455286,982.581856,993.294914,...,78.154029,271.103371,954.084608,78.562945,675.370756,686.864468,480.857602,590.540388,429.565716,411.500715


In [18]:
f = instance_df.loc[: , instance_df.columns.str.startswith('f')].reset_index(drop=True)

In [19]:
f

Unnamed: 0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,f_10,...,f_21,f_22,f_23,f_24,f_25,f_26,f_27,f_28,f_29,f_30
0,396.512196,1297.809600,613.718375,59.185197,47.946346,1056.401838,631.779698,893.372085,366.330347,1174.537895,...,1148.276296,602.791108,577.487959,1150.598624,1063.628843,631.597793,541.957660,838.179388,286.753118,801.872119
1,623.628920,1081.592385,612.387397,498.180039,1191.312302,571.571492,767.911943,482.113584,1222.010147,339.076379,...,557.867766,1030.168603,1015.223277,143.195646,1000.014423,1300.502422,660.518773,1072.048198,515.032453,977.944366
2,1253.426596,556.424704,1199.241427,884.281025,1382.551467,1022.653514,393.083434,817.754841,522.139677,406.952118,...,489.971271,867.503398,191.314264,133.305591,690.440925,858.471809,109.984531,720.831460,402.678179,244.153171
3,323.081912,1203.205674,1133.803548,874.054442,356.635014,886.925487,616.162509,486.314744,1013.968744,521.157475,...,179.868529,280.758600,806.853533,1313.871708,694.291537,657.230354,490.044163,817.036387,521.570367,684.186356
4,702.421362,854.738524,1297.184654,651.214259,1336.018114,746.598288,410.188603,315.122844,496.577546,396.307285,...,438.806849,628.142642,1054.487544,870.338208,742.064592,650.210999,1030.728758,1009.716738,722.198458,1259.052669
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109995,508.400908,156.740917,289.148614,1364.355644,599.891544,971.933136,111.605846,883.847914,1084.300412,978.257699,...,637.931515,1250.684956,678.994123,116.925166,233.645120,443.261377,52.680572,140.263440,389.068969,1160.758085
109996,591.717100,214.295276,533.906373,614.167505,121.902593,1267.037622,683.391050,540.649419,324.659635,908.013121,...,805.499629,936.237489,1230.463844,217.480977,1292.851605,1169.187127,1228.142900,63.482305,1125.518741,761.350182
109997,330.726783,847.939582,1313.493760,77.294263,562.768650,1165.494092,226.612155,1123.066087,950.502763,192.180943,...,1061.196682,1000.040399,664.810709,57.744120,614.965444,1060.948272,71.407806,1298.210960,758.184912,20.711536
109998,562.489980,806.524818,25.785823,364.639071,1252.327599,1039.483778,63.860173,640.240759,1342.227796,1235.882654,...,92.209535,393.851605,1353.231665,101.899278,919.957596,1019.198237,607.587580,719.001927,497.834304,473.371679


In [20]:
X_df=split_list_column(combined_data, "x")

In [21]:
X_df.shape

(110000, 30)

In [22]:
Uncer_df=split_list_column(combined_data, "uncern")

In [23]:
Uncer_df.shape

(110000, 30)

In [24]:
####### Master problem objective
######  sum_i_((f_i - p_bar_i) - * x_i)

In [25]:
np.sum((f.values - p_bar.values) * X_df.values, axis=1)

array([4561.28661058, 3866.58443701, 3220.07389161, ..., 4694.17634219,
       4356.5687916 , 3573.82450646])

In [26]:
combined_data.columns

Index(['f', 'x', 'r', 'second_stage_obj', 'First stage value', 'p_bar',
       'Reduced Capacity', 't', 'p_hat', 'y', 'original_capacity', 'w',
       'uncern', 'gamma', 'seed'],
      dtype='object')

In [27]:
combined_data["second_stage_obj"]

0        -20095.336990
1        -16263.838092
2        -13137.218650
3        -18684.906823
4        -18641.801475
              ...     
109995   -12835.288650
109996   -15343.637301
109997   -18573.031778
109998   -19756.175924
109999   -14058.097285
Name: second_stage_obj, Length: 110000, dtype: float64

In [28]:
####### Very important line, always check!!!! This only gives the second stage values of the loss
target=combined_data["second_stage_obj"] + np.sum((f.values - p_bar.values) * X_df.values, axis=1)

In [29]:
target

0        -15534.050379
1        -12397.253655
2         -9917.144758
3        -14145.234771
4        -14353.283008
              ...     
109995    -9885.430475
109996   -11713.580727
109997   -13878.855435
109998   -15399.607133
109999   -10484.272779
Name: second_stage_obj, Length: 110000, dtype: float64

## Neural Network Training

In [30]:
# Convert target column to numpy array
target_array = target.values
target_array

array([-15534.0503793 , -12397.25365485,  -9917.14475847, ...,
       -13878.85543537, -15399.60713263, -10484.27277896])

In [31]:
target_array.max()

-6500.939944613325

In [32]:
instance_df.shape[1],X_df.shape[1],Uncer_df.shape[1]

(152, 30, 30)

In [26]:
class MyModel(nn.Module):
    def __init__(self, instance_size=152, X_size=30, Uncern_size=30):
        super(MyModel, self).__init__()
        self.embedding_instance = nn.Linear(instance_size, 64)
        self.embedding_X = nn.Linear(X_size, 15)
        self.embedding_uncern = nn.Linear(Uncern_size, 15)
        self.fc1 = nn.Linear(94, 128)  
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64,30)
        self.fc4= nn.Linear(30,10)
        self.fc5= nn.Linear(10,1)
        


    def forward(self, instance, X, uncern):
        
        instance_embed = torch.relu(self.embedding_instance(instance))
        X_embed = torch.relu(self.embedding_X(X))
        uncern_embed = torch.relu(self.embedding_uncern(uncern))
        
        concatenated = torch.cat((instance_embed, X_embed, uncern_embed), dim=1)
        
        output = torch.relu(self.fc1(concatenated))
        output = torch.relu(self.fc2(output))
        output = torch.relu(self.fc3(output))
        output = torch.relu(self.fc4(output))
        output = self.fc5(output)

        return output

In [77]:
model = MyModel()

# Access fc1 layer
fc1_layer = model.fc1

# Get the named children of fc1 layer
fc1_children = list(fc1_layer.named_children())

# Print the named children
print(fc1_children)

[]


In [78]:
fc1_params = list(fc1_layer.parameters())

# Print the parameters
print(fc1_params)


[Parameter containing:
tensor([[ 0.0677,  0.0529, -0.0459,  ..., -0.0929,  0.0844,  0.0959],
        [-0.0004, -0.0804,  0.0958,  ..., -0.0500,  0.0868,  0.0470],
        [ 0.0457, -0.0456,  0.0348,  ..., -0.0623,  0.0386, -0.0677],
        ...,
        [ 0.0781,  0.0374,  0.0920,  ...,  0.0997,  0.0073, -0.0411],
        [-0.0313, -0.0489,  0.0265,  ..., -0.0956,  0.0225,  0.0182],
        [ 0.0708, -0.0146, -0.0173,  ...,  0.0557,  0.0405, -0.0254]],
       requires_grad=True), Parameter containing:
tensor([ 5.9467e-02, -8.8047e-02,  5.5949e-02, -8.4145e-02,  3.6073e-02,
        -4.6981e-02,  5.9066e-02,  3.3266e-02,  8.7923e-02, -3.6221e-02,
         5.6682e-02, -7.3006e-02, -8.7046e-02, -1.0283e-01,  7.0124e-02,
        -1.8964e-02,  8.9428e-02,  2.4599e-02, -4.1385e-02,  9.3615e-02,
        -1.4957e-02,  5.2630e-02, -4.3941e-02,  4.4552e-03, -6.9760e-02,
         3.8948e-02, -1.0104e-01, -3.1709e-02, -3.6666e-02,  7.3714e-02,
         2.2916e-02,  5.1897e-02,  7.1406e-02, -2.8822e

In [34]:
# Split data into training and test sets
instance_train, instance_test, \
X_train, X_test, \
uncern_train, uncern_test, \
target_train, target_test = train_test_split(instance_df.values, X_df.values, Uncer_df.values, target_array, test_size=0.2, random_state=1)


### Since the X_train is always scaled between 0 or 1, here we shall just scale the values of the intsnace_train and uncern_train. Once this is achieved, we will get the values of x_min and x_max from the training set which we wil further use to scale the values of the testing set.


In [35]:
scaler_instance = MinMaxScaler()
scaler_instance.fit(instance_train)
instance_train_transformed = scaler_instance.transform(instance_train)
instance_test_transformed = scaler_instance.transform(instance_test)

In [36]:
scaler_uncern = MinMaxScaler()
scaler_uncern.fit(uncern_train)
uncern_train_transformed = scaler_uncern.transform(uncern_train)
uncern_test_transformed = scaler_uncern.transform(uncern_test)

In [37]:
scaler_target = MinMaxScaler()
scaler_target.fit(target_train.reshape(-1,1))
target_train_transformed = scaler_target.transform(target_train.reshape(-1,1))
target_test_transformed = scaler_target.transform(target_test.reshape(-1,1))

In [38]:
scaler_target.data_max_

array([-6527.5427947])

In [39]:
scaler_target.data_min_

array([-18754.54910045])

In [40]:
min_max_scalers={}
min_max_scalers["scaler_instance.data_max_"]=scaler_instance.data_max_.tolist()
min_max_scalers["scaler_instance.data_min_"]=scaler_instance.data_min_.tolist()
min_max_scalers["scaler_uncern.data_max_"]=scaler_uncern.data_max_.tolist()
min_max_scalers["scaler_uncern.data_min_"]=scaler_uncern.data_min_.tolist()
min_max_scalers["scaler_target.data_max_"]=scaler_target.data_max_.tolist()
min_max_scalers["scaler_target.data_min_"]=scaler_target.data_min_.tolist()

# Specify the file path

file = "min_max_scalers_inst_30.json"
file_path =  os.path.join(path_save, file)
# Write dictionary to JSON file
with open(file_path, 'w') as json_file:
    json.dump(min_max_scalers, json_file)

print("JSON file created successfully!")

JSON file created successfully!


In [41]:
# Convert numpy arrays to PyTorch tensors
instance_train_tensor = torch.tensor(instance_train_transformed, dtype=torch.float32)
instance_test_tensor = torch.tensor(instance_test_transformed, dtype=torch.float32)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
uncern_train_tensor = torch.tensor(uncern_train_transformed, dtype=torch.float32)
uncern_test_tensor = torch.tensor(uncern_test_transformed, dtype=torch.float32)
target_train_tensor = torch.tensor(target_train_transformed, dtype=torch.float32)
target_test_tensor = torch.tensor(target_test_transformed, dtype=torch.float32)

In [42]:
# Create training and test datasets
train_dataset = TensorDataset(instance_train_tensor, X_train_tensor, uncern_train_tensor, target_train_tensor)
test_dataset = TensorDataset(instance_test_tensor, X_test_tensor, uncern_test_tensor, target_test_tensor)


In [43]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [44]:
# Move model to GPU
model = MyModel().to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()
# Create training and test data loaders
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [48]:
optimizer = optim.Adamax(model.parameters(), lr=0.01)

# Define learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.9, patience=30, verbose=True)

# Train the model
num_epochs = 1000
for epoch in range(num_epochs):
    running_loss = 0.0
    running_mae = 0.0  # Initialize running MAE
    for i, data in enumerate(train_loader):
        inputs_instance, inputs_X, inputs_uncern, labels = data
        # Move tensors to GPU
        inputs_instance, inputs_X, inputs_uncern, labels = inputs_instance.to(device), inputs_X.to(device), inputs_uncern.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs_instance, inputs_X, inputs_uncern)
        loss = criterion(outputs, labels.view(-1, 1))  # Assuming labels is a column vector
        
        # Calculate Mean Absolute Error (MAE)
        mae = torch.mean(torch.abs(outputs - labels.view(-1, 1)))
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        running_mae += mae.item()  # Accumulate MAE
        
        ######## Print the true loss on the inverse transformed variables
        
        ############################################################################
        outputs_invtransformed = scaler_target.inverse_transform(outputs.cpu().detach().numpy())
        labels_invtransformed = scaler_target.inverse_transform(labels.view(-1, 1).cpu().detach().numpy())
        outputs_invtransformed = outputs_invtransformed.flatten()
        labels_invtransformed = labels_invtransformed.flatten()
        # Calculate MSE
        mae_invtransformed = np.mean(np.abs(outputs_invtransformed - labels_invtransformed))
        # Calculate MAPE
        mape_invtransformed = np.mean(np.abs((outputs_invtransformed - labels_invtransformed) /labels_invtransformed)) * 100
        #############################################################################
        
        
        if epoch % 100 == 0 and (i + 1) % 300 == 0:  # Print every 100 epochs and 100th mini-batch
            print('[%d, %5d] epoch_loss: %.6f batch_loss: %.6f batch_mae: %.6f' %
                  (epoch + 1, i + 1, running_loss / 100, loss.item(), mae.item()))
            running_loss = 0.0
            
            print('True MAE: %.6f , True MAPE: %.6f '%
                 (mae_invtransformed, mape_invtransformed))
    
        ############################################################################
    
    # Perform validation and adjust learning rate
    with torch.no_grad():
        test_loss = 0.0
        test_mae = 0.0
        mae_invtransformed_test = 0.0
        mape_invtransformed_test = 0.0
        for data in test_loader:
            inputs_instance, inputs_X, inputs_uncern, labels = data
            # Move tensors to GPU
            inputs_instance, inputs_X, inputs_uncern, labels = inputs_instance.to(device), inputs_X.to(device), inputs_uncern.to(device), labels.to(device)
            
            outputs = model(inputs_instance, inputs_X, inputs_uncern)
            test_loss += criterion(outputs, labels.view(-1, 1)).item()
            test_mae += torch.mean(torch.abs(outputs - labels.view(-1, 1))).item()  # Calculate MAE for test set
            
            ####################################################################################
            outputs_invtransformed_test = scaler_target.inverse_transform(outputs.cpu().detach().numpy())
            labels_invtransformed_test = scaler_target.inverse_transform(labels.view(-1, 1).cpu().detach().numpy())
            outputs_invtransformed_test = outputs_invtransformed.flatten()
            labels_invtransformed_test = labels_invtransformed.flatten()
            # Calculate MSE
            mae_invtransformed_test += np.mean(np.abs(outputs_invtransformed_test - labels_invtransformed_test))
            # Calculate MAPE
            mape_invtransformed_test += np.mean(np.abs((outputs_invtransformed_test - labels_invtransformed_test) /labels_invtransformed_test)) * 100
            
            ####################################################################################
            
        scheduler.step(test_loss)
        
    if epoch % 100 == 0:  # Print every 100 epochs
        print('[%d] Training MAE: %.6f ***Validation*** MAE: %.6f #######  True MAE on test batch %.3f , True MAPE on test batch %.3f' % (epoch + 1, running_mae / len(train_loader), test_mae / len(test_loader), mae_invtransformed_test/len(test_loader), mape_invtransformed_test/len(test_loader)))

print('Finished Training')

[1,   300] epoch_loss: 0.000151 batch_loss: 0.000023 batch_mae: 0.003598
True MAE: 43.998329 , True MAPE: 0.398016 
[1] Training MAE: 0.004490 ***Validation*** MAE: 0.004287 #######  True MAE on test batch 50.532 , True MAPE on test batch 0.464
Epoch 00033: reducing learning rate of group 0 to 9.0000e-03.
Epoch 00064: reducing learning rate of group 0 to 8.1000e-03.
Epoch 00095: reducing learning rate of group 0 to 7.2900e-03.
[101,   300] epoch_loss: 0.000081 batch_loss: 0.000020 batch_mae: 0.003305
True MAE: 40.411575 , True MAPE: 0.371967 
[101] Training MAE: 0.003921 ***Validation*** MAE: 0.004319 #######  True MAE on test batch 49.472 , True MAPE on test batch 0.451
Epoch 00126: reducing learning rate of group 0 to 6.5610e-03.
Epoch 00180: reducing learning rate of group 0 to 5.9049e-03.
[201,   300] epoch_loss: 0.000074 batch_loss: 0.000029 batch_mae: 0.004077
True MAE: 49.851288 , True MAPE: 0.460420 
[201] Training MAE: 0.003776 ***Validation*** MAE: 0.004472 #######  True MAE 

In [49]:
model

MyModel(
  (embedding_instance): Linear(in_features=152, out_features=64, bias=True)
  (embedding_X): Linear(in_features=30, out_features=15, bias=True)
  (embedding_uncern): Linear(in_features=30, out_features=15, bias=True)
  (fc1): Linear(in_features=94, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=30, bias=True)
  (fc4): Linear(in_features=30, out_features=10, bias=True)
  (fc5): Linear(in_features=10, out_features=1, bias=True)
)

In [50]:
# Specify the file path where you want to save the model weights

file = "model_weights_30_instance_50_scen.pth"
file_path = os.path.join(path_save, file)
# Save the model state dictionary
torch.save(model.state_dict(), file_path)

print("Model weights saved successfully at:", file_path)

Model weights saved successfully at: /ztank/scratch/user/u.rd143338/ss_from_nn/Neural_second_stage/instance 30 scenario 50/model_weights_30_instance_50_scen.pth


In [51]:
save_weights_biases_to_json(model, path_save, 'model_weight_30_instance_50_scen.json')

In [60]:
inputs_instance.shape[1]

152

In [61]:
# Create a list to store dictionaries of data
data_dicts = []

with torch.no_grad():
    for data in test_loader:
        inputs_instance, inputs_X, inputs_uncern, labels = data
        inputs_instance, inputs_X, inputs_uncern, labels = inputs_instance.to(device), inputs_X.to(device), inputs_uncern.to(device), labels.to(device)

        outputs = model(inputs_instance, inputs_X, inputs_uncern).cpu().numpy()  # Convert predictions to numpy array
        labels = labels.cpu().numpy()  # Convert labels to numpy array
        
        # Iterate through batch
        for i in range(len(labels)):
            # Prepare a dictionary for row data
            row_data = {}
            for j in range(inputs_instance.shape[1]):
                row_data['Input_Instance_' + str(j)] = inputs_instance[i][j].item()
            for j in range(inputs_X.shape[1]):
                row_data['Input_X_' + str(j)] = inputs_X[i][j].item()
                row_data['Input_Uncern_' + str(j)] = inputs_uncern[i][j].item()
            row_data['Predicted_Output'] = outputs[i][0]
            row_data['Real_Output'] = labels[i]
            
            # Append row dictionary to the list
            data_dicts.append(row_data)

# Create DataFrame from the list of dictionaries
test_data_df = pd.DataFrame(data_dicts)
test_data_df = pd.concat([test_data_df.iloc[:,:inputs_instance.shape[1]], test_data_df[sorted(test_data_df.iloc[:,inputs_instance.shape[1]:])]], axis=1)



In [62]:
# Create a list to store dictionaries of data
data_dicts = []

with torch.no_grad():
    for data in train_loader:
        inputs_instance, inputs_X, inputs_uncern, labels = data
        inputs_instance, inputs_X, inputs_uncern, labels = inputs_instance.to(device), inputs_X.to(device), inputs_uncern.to(device), labels.to(device)

        outputs = model(inputs_instance, inputs_X, inputs_uncern).cpu().numpy()  # Convert predictions to numpy array
        labels = labels.cpu().numpy()  # Convert labels to numpy array
        
        # Iterate through batch
        for i in range(len(labels)):
            # Prepare a dictionary for row data
            row_data = {}
            for j in range(inputs_instance.shape[1]):
                row_data['Input_Instance_' + str(j)] = inputs_instance[i][j].item()
            for j in range(inputs_X.shape[1]):
                row_data['Input_X_' + str(j)] = inputs_X[i][j].item()
                row_data['Input_Uncern_' + str(j)] = inputs_uncern[i][j].item()
            row_data['Predicted_Output'] = outputs[i][0]
            row_data['Real_Output'] = labels[i]
            
            # Append row dictionary to the list
            data_dicts.append(row_data)

# Create DataFrame from the list of dictionaries
train_data_df = pd.DataFrame(data_dicts)
train_data_df = pd.concat([train_data_df.iloc[:,:inputs_instance.shape[1]], train_data_df[sorted(train_data_df.iloc[:,inputs_instance.shape[1]:])]], axis=1)

In [63]:
train_data_df

Unnamed: 0,Input_Instance_0,Input_Instance_1,Input_Instance_2,Input_Instance_3,Input_Instance_4,Input_Instance_5,Input_Instance_6,Input_Instance_7,Input_Instance_8,Input_Instance_9,...,Input_X_29,Input_X_3,Input_X_4,Input_X_5,Input_X_6,Input_X_7,Input_X_8,Input_X_9,Predicted_Output,Real_Output
0,0.164730,0.192093,0.974959,0.899141,0.043330,0.654301,0.457559,0.254529,0.544263,0.988376,...,1.0,1.0,-0.0,1.0,1.0,-0.0,1.0,1.0,0.764892,[0.76596874]
1,0.546057,0.483998,0.773054,0.257339,0.172176,0.269748,0.485393,0.307345,0.165810,0.447696,...,-0.0,1.0,-0.0,1.0,1.0,-0.0,-0.0,1.0,0.602053,[0.6017551]
2,0.040866,0.305102,0.261795,0.840297,0.502185,0.534389,0.399226,0.666784,0.162492,0.425661,...,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.391674,[0.39263067]
3,0.013430,0.925320,0.047815,0.086162,0.205996,0.179840,0.074682,0.171070,0.156180,1.000000,...,1.0,1.0,1.0,1.0,-0.0,1.0,1.0,1.0,0.547786,[0.5474196]
4,0.755470,0.376943,0.353249,0.602103,0.757552,0.347509,0.867167,0.556594,0.459876,0.543954,...,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.316686,[0.31746706]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87995,0.452325,0.518258,0.026594,0.560850,0.431025,0.900441,0.518797,0.941409,0.689752,0.250133,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.546588,[0.5441008]
87996,0.149431,0.605124,0.516706,0.624934,0.244022,0.580957,0.929052,0.603143,0.474768,0.105521,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,-0.0,0.520099,[0.52055556]
87997,0.217459,0.040286,0.769093,0.866289,0.111609,0.631405,0.665897,0.131020,0.558484,0.701189,...,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.335764,[0.34188482]
87998,0.139850,0.773235,0.890499,0.570399,0.633835,0.027000,0.463572,0.159583,0.718375,0.715948,...,1.0,1.0,1.0,-0.0,1.0,-0.0,1.0,1.0,0.333778,[0.33160302]


In [64]:
data_df = pd.concat([train_data_df, test_data_df])
file = os.path.join(path_save, 'test_results_30_instance_50_scen.csv')
data_df.to_csv(file, index=False)

print("CSV file saved successfully.")

CSV file saved successfully.


### Instantiate the pytorch model for a stored .pth file

In [65]:
# Function to hook to the forward pass of all layers
def activation_hook(name):
    def hook(module, input, output):
        activated_neurons = torch.relu(output) > 0  # Using ReLU activation function
        activations_dict[name] = activated_neurons
    return hook

In [66]:

row = 100
    # Create an instance of your model
model = MyModel()
model.load_state_dict(torch.load(os.path.join(path_save, "model_weights_30_instance_50_scen.pth")))

# Dictionary to store activated neurons for each layer
activations_dict = {}

# Register the hook to all layers of the model
for name, module in model.named_children():
    module.register_forward_hook(activation_hook(name))

with torch.no_grad():
    output = model(train_dataset.tensors[0][row].reshape(1,-1), train_dataset.tensors[1][row].reshape(1,-1), train_dataset.tensors[2][row].reshape(1,-1))
result = np.concatenate([activations_dict[key].numpy().reshape(-1) for key in ['embedding_instance', 'embedding_X', 'embedding_uncern', 'fc1', 'fc2', 'fc3']])
# # Print the activated neurons for each layer
# for layer_name, activated_neurons in activations_dict.items():
#     print(f"Activated neurons for {layer_name}: {activated_neurons}")

In [67]:
result = []
n_row = train_dataset.tensors[0].shape[0]
for i in range(0,n_row):
    model = MyModel()
    model.load_state_dict(torch.load(os.path.join(path_save, "model_weights_30_instance_50_scen.pth")))

    # Dictionary to store activated neurons for each layer
    activations_dict = {}

    # Register the hook to all layers of the model
    for name, module in model.named_children():
        module.register_forward_hook(activation_hook(name))

    with torch.no_grad():
        output = model(train_dataset.tensors[0][i].reshape(1,-1), train_dataset.tensors[1][i].reshape(1,-1), train_dataset.tensors[2][i].reshape(1,-1))
    result.append(np.concatenate([activations_dict[key].numpy().reshape(-1) for key in ['embedding_instance', 'embedding_X', 'embedding_uncern', 'fc1', 'fc2', 'fc3','fc4']]))
activations = pd.DataFrame(result)

In [68]:
result_test = []
n_row_t = test_dataset.tensors[0].shape[0]
for i in range(0,n_row_t):
    model = MyModel()
    model.load_state_dict(torch.load(os.path.join(path_save, "model_weights_30_instance_50_scen.pth")))

    # Dictionary to store activated neurons for each layer
    activations_dict = {}

    # Register the hook to all layers of the model
    for name, module in model.named_children():
        module.register_forward_hook(activation_hook(name))

    with torch.no_grad():
        output = model(test_dataset.tensors[0][i].reshape(1,-1), test_dataset.tensors[1][i].reshape(1,-1), test_dataset.tensors[2][i].reshape(1,-1))
    result_test.append(np.concatenate([activations_dict[key].numpy().reshape(-1) for key in ['embedding_instance', 'embedding_X', 'embedding_uncern', 'fc1', 'fc2', 'fc3', 'fc4']]))

activations_test = pd.DataFrame(result_test)

In [69]:
act_train = (activations.sum()/train_dataset.tensors[0].shape[0]).to_list()
act_test = (activations_test.sum()/test_dataset.tensors[0].shape[0]).to_list()

for i in range(len(act_train)):
    train_activation = act_train[i]
    test_activation = act_test[i]

    train_text = f"Train: {round(train_activation, 6)}"
    test_text = f"Test: {round(test_activation, 6)}"

    # Check if activations are 0 and change color accordingly
    if train_activation == 0:
        train_text = f"\033[91m{train_text}\033[0m"  # Red color
    if test_activation == 0:
        test_text = f"\033[91m{test_text}\033[0m"  # Red color

    print(train_text, test_text)

Train: 0.180091 Test: 0.179636
[91mTrain: 0.0[0m [91mTest: 0.0[0m
Train: 0.520295 Test: 0.518818
Train: 0.32042 Test: 0.318318
[91mTrain: 0.0[0m [91mTest: 0.0[0m
Train: 0.140557 Test: 0.137773
Train: 0.349523 Test: 0.351909
[91mTrain: 0.0[0m [91mTest: 0.0[0m
[91mTrain: 0.0[0m [91mTest: 0.0[0m
Train: 0.239477 Test: 0.242091
[91mTrain: 0.0[0m [91mTest: 0.0[0m
[91mTrain: 0.0[0m [91mTest: 0.0[0m
[91mTrain: 0.0[0m [91mTest: 0.0[0m
[91mTrain: 0.0[0m [91mTest: 0.0[0m
Train: 0.009852 Test: 0.010591
Train: 0.340352 Test: 0.338591
[91mTrain: 0.0[0m [91mTest: 0.0[0m
[91mTrain: 0.0[0m [91mTest: 0.0[0m
Train: 0.211148 Test: 0.205409
Train: 0.144773 Test: 0.145909
[91mTrain: 0.0[0m [91mTest: 0.0[0m
[91mTrain: 0.0[0m [91mTest: 0.0[0m
Train: 0.095375 Test: 0.0935
[91mTrain: 0.0[0m [91mTest: 0.0[0m
[91mTrain: 0.0[0m [91mTest: 0.0[0m
[91mTrain: 0.0[0m [91mTest: 0.0[0m
[91mTrain: 0.0[0m [91mTest: 0.0[0m
Train: 0.065182 Test: 0.064273
[91mTr