# Dataset

## Import Library

In [7]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

# pytorch
import torch
from torch import optim
from torch.autograd import Variable,gradcheck
from torch.utils.data import DataLoader

# sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# others
import numpy as np
import pandas as pd
import pdb
import sys
sys.path.append("../model_source/")
from all_models import MTNN,MTNN2,seed_everything
import evaluation_and_visualization as ev_viz

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load  Data

In [15]:
TRAINPERCENTAGE=0.55
DATASETNAME="../../../../datasets/evaluation_data/AllData_with_Pressure_And_Velocity_Samples_case1/AllData_PRESSURE_and_VELOCITY_10_SAMPLES_All_RE_SF_Groups_TRAINPERCENTAGE_{}_With_Output_Mask_".format(TRAINPERCENTAGE)

exists = os.path.isfile(DATASETNAME+"trainX.npy")
if exists:
    X_train=np.load(DATASETNAME+"trainX.npy")
    y_train=np.load(DATASETNAME+"trainY.npy")
    X_val=np.load(DATASETNAME+"valX.npy")
    y_val=np.load(DATASETNAME+"valY.npy")
    X_test=np.load(DATASETNAME+"testX.npy")
    y_test=np.load(DATASETNAME+"testY.npy")
    print("Loaded Files")
else:
    pass 

Loaded Files


In [16]:
#y matrix structure (Fx-nondim, PressureField1,...,PressureField10,VelocityXField1,...,VelocityXField10,Px,Py,Pz,TauX,TauY,Tauz,AvgFx-NonDim-Per_Re_SF,Mask)
#Currently Mask is all ones.
print("Shape of Training and Testing Files")
print(X_train.shape,y_train.shape,X_val.shape,y_val.shape,X_test.shape,y_test.shape)

Shape of Training and Testing Files
(2639, 47) (2639, 29) (564, 47) (564, 29) (2621, 47) (2621, 29)


## Normalization

In [17]:
#Mask specifically for pressure and velocity fields.
mask_train=np.int32(y_train[:,-1])
mask_val=np.int32(y_val[:,-1])
mask_test=np.int32(y_test[:,-1])

# normalize on training set and apply to test set
std_scaler_x = StandardScaler()
std_scaler_y = StandardScaler()
std_scaler_pres=StandardScaler()
std_scaler_velX=StandardScaler()   #Not used in this model but can be dropped in place of pressure without change.

# fit and transform on training set
X_train = std_scaler_x.fit_transform(X_train)
y_train[:, 0] = std_scaler_y.fit_transform(
    np.reshape(y_train[:, 0], (y_train.shape[0], 1))
)[:, 0]
  
#apply transformation on validation set.
X_val = std_scaler_x.transform(X_val)
y_val[:,0] = std_scaler_y.transform(
     np.reshape(y_val[:,0], (y_val.shape[0],1))
   )[:,0]

# apply transformation on test set
X_test = std_scaler_x.transform(X_test)
y_test[:, 0] = std_scaler_y.transform(
    np.reshape(y_test[:, 0], (y_test.shape[0], 1))
)[:, 0]

# # # #Standard Scaling of pressure y values. Only calculated for training.

y_train[mask_train==1,1:11] = std_scaler_pres.fit_transform(y_train[mask_train==1,1:11])
y_val[mask_val==1,1:11] = std_scaler_pres.transform(y_val[mask_val==1,1:11])
y_test[mask_test==1,1:11] = std_scaler_pres.transform(y_test[mask_test==1,1:11])

#### Standard Scaling of velocity y values. Only calculated for training.

y_train[mask_train==1,11:21] = std_scaler_velX.fit_transform(y_train[mask_train==1,11:21])
y_val[mask_val==1,11:21] = std_scaler_velX.transform(y_val[mask_val==1,11:21])
y_test[mask_test==1,11:21] = std_scaler_velX.transform(y_test[mask_test==1,11:21])


print("Shapes y_train = {}, y_val = {}, y_test = {}".format(y_train.shape,y_val.shape,y_test.shape))

Shapes y_train = (2639, 29), y_val = (564, 29), y_test = (2621, 29)


# Fully Connected Network 

## Configurations

In [18]:
# CUDA support 
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# model settings
D_in  = X_train.shape[1]
output_size = 1    #1 FX-nondim value.
output_size_aux = 10  #10 Pressure Sampled values
H = 128
depth = 5
depth_aux = 5
shared_depth = 1
NUMEPOCHS = 500
Batch_size = 100

## Model

## Training

In [12]:
NUMEXPERIMENTS=1

models=list()
test_mses=list()
test_mres=list()
test_preds_all=list()
test_targets_all=list()

seed_everything(123)
for expt_no in range(NUMEXPERIMENTS):
    losses=list()
    # Compile model   
    model = MTNN(D_in, H, output_size,output_size_aux, depth,depth_aux,shared_depth,device=device).to(device)

    # Loss Function
    criterion = torch.nn.MSELoss() 
    
    # Optimizer
    optimizer = optim.Adadelta(model.parameters())
    
    # Input Data
    trainX = Variable(torch.from_numpy(X_train).float()).to(device)
    trainY = Variable(torch.from_numpy(y_train[:,0]).float()).to(device)
    trainY_AUX = Variable(torch.from_numpy(y_train[:,1:11]).float()).to(device)
    trainMask=torch.FloatTensor(torch.from_numpy(y_train[:,-1]).float()).to(device)

    valX = Variable(torch.from_numpy(X_val).float()).to(device)
    valY = Variable(torch.from_numpy(y_val[:,0]).float()).to(device)
    valY_AUX = Variable(torch.from_numpy(y_val[:,1:11]).float()).to(device)
    valMask=torch.FloatTensor(torch.from_numpy(y_val[:,-1]).float()).to(device)

    testX = Variable(torch.from_numpy(X_test).float()).to(device)
    testY = Variable(torch.from_numpy(y_test[:,0]).float()).to(device)
    testY_AUX = Variable(torch.from_numpy(y_test[:,1:11]).float()).to(device)
    testMask=torch.FloatTensor(torch.from_numpy(y_test[:,-1]).float()).to(device)

    # Train the model
    data_train_loader = DataLoader(
        list(zip(trainX,trainY,trainY_AUX,trainMask)), 
        batch_size=Batch_size, 
        shuffle=True
    )

    print("Epochs")
    losses_main=list()
    losses_aux=list()
    for epoch in range(NUMEPOCHS):
        alltargets = list()
        allpredictions = list()
        allpredictions_aux=list()
        epoch_losses_all=list()
        epoch_losses_main=list()
        epoch_losses_aux=list()
        for batchX, batchY,batchY_AUX,batchMask in data_train_loader: 
            # Forward pass
            outputs,outputs_AUX = model(batchX)

            outputs_AUX = outputs_AUX*batchMask.unsqueeze(1)

            loss1 = criterion(outputs.squeeze(), batchY)
            loss2 = criterion(outputs_AUX,batchY_AUX)
            
            loss = loss1 + 0.01*loss2
            allpredictions_aux.append(outputs_AUX)
            allpredictions.append(outputs)
            alltargets.append(batchY)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()     
            optimizer.step()
            losses.append(loss)
            epoch_losses_all.append(loss.item())
            epoch_losses_main.append(loss1.item())
            epoch_losses_aux.append(loss2.item())
            
        losses_main.append(np.mean(epoch_losses_main))
        losses_aux.append(np.mean(epoch_losses_aux))
        
        # Print Epochs and Losses to Monitor Convergence
        if epoch % 50 == 0:
            print("{}".format(epoch),end = ", ")
            print(loss)

    print('\nTraining Complete')

    with torch.no_grad():
        testpreds,testpreds_aux = model(testX)

        preds = torch.from_numpy(
            std_scaler_y.inverse_transform(testpreds.cpu().detach())
        ).float()

        tgts = torch.from_numpy(
            std_scaler_y.inverse_transform(testY.cpu().data.numpy())
        ).float()
        
        error = np.mean(np.square(preds.squeeze().data.numpy() - tgts.squeeze().data.numpy()))
        print("Test MSE = {}".format(error))
        
        #Relative Error
        rel_err=np.mean(np.abs(preds.squeeze().data.cpu().numpy() - tgts.squeeze().data.cpu().numpy())/y_test[:,-2])
        print("Relative Error = {}".format(rel_err))

        aurec = ev_viz.aurec(preds.squeeze().data.cpu().numpy().ravel(),tgts.squeeze().data.cpu().numpy().ravel(),y_test[:,-2])
        print("AUREC = {}".format(aurec))
        
    models.append(model)
    test_mses.append(error)
    test_mres.append(rel_err)
    test_preds_all.append(preds)
    test_targets_all.append(tgts)
    #END For.
    
#Save Models, Predictions (of the model which is the best in terms of MRE.)
#Store predictions, targets and average value as numpy array.

print("Overall Test MSE = {}, Test MRE = {}".format(round(np.mean(test_mses),5),round(np.mean(test_mres),5)))   

Epochs
0, tensor(0.2257, device='cuda:0', grad_fn=<AddBackward0>)
50, tensor(0.1082, device='cuda:0', grad_fn=<AddBackward0>)
100, tensor(0.0350, device='cuda:0', grad_fn=<AddBackward0>)
150, tensor(0.0233, device='cuda:0', grad_fn=<AddBackward0>)
200, tensor(0.0149, device='cuda:0', grad_fn=<AddBackward0>)
250, tensor(0.0220, device='cuda:0', grad_fn=<AddBackward0>)
300, tensor(0.0071, device='cuda:0', grad_fn=<AddBackward0>)
350, tensor(0.0089, device='cuda:0', grad_fn=<AddBackward0>)
400, tensor(0.0042, device='cuda:0', grad_fn=<AddBackward0>)
450, tensor(0.0047, device='cuda:0', grad_fn=<AddBackward0>)

Training Complete
Test MSE = 20.12626838684082
Relative Error = 0.1566105768090249
AUREC = 0.8559328500572301
Overall Test MSE = 20.126270294189453, Test MRE = 0.15661


In [14]:
MODEL='DNN-MT-PRES'
_tmp=std_scaler_x.inverse_transform(X_test)
_tmp2=np.vstack([preds.to("cpu").data.numpy().ravel(),tgts.to("cpu").data.numpy().ravel(),y_test[:,-2],_tmp[:,-2],_tmp[:,-1]]).T
_df=pd.DataFrame(_tmp2,columns=['{}-Predictions'.format(MODEL),'Targets','Mean','Re','Solidfraction'])
outputfile="/home/nik90/experiments/particleDragForce/dnn_mt_pres/{}_case1_predictions_TRAINPERCENTAGE_{}.csv".format(MODEL,TRAINPERCENTAGE)
_df.to_csv(outputfile,index=False)

#Save Model
torch.save(model.state_dict(),"../../../../models/Nikhil_{}_Case1_TrainPercentage_{}".format(MODEL,TRAINPERCENTAGE))