# Dataset

## Import Library

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import pdb
import warnings
warnings.filterwarnings('ignore')


# pytorch
import torch
from torch import optim
from torch.autograd import Variable,gradcheck
from torch.utils.data import DataLoader,Dataset

# sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# others
import numpy as np
import pandas as pd
import sys
sys.path.append("../model_source/")
from all_models import DNN,seed_everything
import evaluation_and_visualization as ev_viz
from utils import calc_cumulative

ModuleNotFoundError: No module named 'torch'

# Experiment 3 All Data (With All Pressure and Velocity Fields) Case 1

In [2]:
TRAINPERCENTAGE=0.55
DATASETNAME="../../../../datasets/evaluation_data/AllData_with_Pressure_And_Velocity_Samples_case1/AllData_PRESSURE_and_VELOCITY_10_SAMPLES_All_RE_SF_Groups_TRAINPERCENTAGE_{}_With_Output_Mask_".format(TRAINPERCENTAGE)

exists = os.path.isfile(DATASETNAME+"trainX.npy")
if exists:
    X_train=np.load(DATASETNAME+"trainX.npy")
    y_train=np.load(DATASETNAME+"trainY.npy")
    X_val=np.load(DATASETNAME+"valX.npy")
    y_val=np.load(DATASETNAME+"valY.npy")
    X_test=np.load(DATASETNAME+"testX.npy")
    y_test=np.load(DATASETNAME+"testY.npy")
    print("Loaded Files")
else:
    pass


print("Columns to delete = {}".format([i for i in range(1,11)]))
y_train = np.delete(y_train,[i for i in range(1,11)], axis=1)
y_val = np.delete(y_val,[i for i in range(1,11)],axis=1)
y_test = np.delete(y_test,[i for i in range(1,11)],axis=1)

print(y_train.shape,y_val.shape,y_test.shape)

Loaded Files
Columns to delete = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
(2639, 19) (564, 19) (2621, 19)


In [4]:
#Note, just in this case the dimension of the y values is different because we just delete the 10 pressure field values.
######### Old y matrix structure
#(Fx-nondim, PressureField1,...,PressureField10,VelocityXField1,...,VelocityXField10,Px,Py,Pz,TauX,TauY,Tauz,AvgFx-NonDim-Per_Re_SF,Mask)
###########################
########## New y matrix structure just for this notebook
#(Fx-nondim,VelocityXField1,...,VelocityXField10,Px,Py,Pz,TauX,TauY,Tauz,AvgFx-NonDim-Per_Re_SF,Mask)
#####################
#Currently Mask is all ones.
print("Shape of Training and Testing Files")
print(X_train.shape,y_train.shape,X_val.shape,y_val.shape,X_test.shape,y_test.shape)

Shape of Training and Testing Files
(2639, 47) (2639, 19) (564, 47) (564, 19) (2621, 47) (2621, 19)


## Normalization

In [5]:
# normalize on training set and apply to test set
std_scaler_x = StandardScaler()
std_scaler_y = StandardScaler()


# fit and transform on training set
X_train = std_scaler_x.fit_transform(X_train)
y_train[:, :11] = std_scaler_y.fit_transform(
    np.reshape(y_train[:, :11], (y_train.shape[0], 11))
)

#apply transformation on validation set.
X_val = std_scaler_x.transform(X_val)
y_val[:,:11] = std_scaler_y.transform(
     np.reshape(y_val[:,:11], (y_val.shape[0],11))
   )

# apply transformation on test set
X_test = std_scaler_x.transform(X_test)
y_test[:, :11] = std_scaler_y.transform(
    np.reshape(y_test[:, :11], (y_test.shape[0], 11))
)

# Fully Connected Network  DNN+ Pressure

## Configurations

In [6]:
# CUDA support 
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# model settings
D_in  = X_train.shape[1]
D_out = 11   #1 FX-Nondim value, 10 velocity values.
H = 128
Depth = 5
NUMEPOCHS = 500
BATCHSIZE = 100

## Prepare For Training

## Training

In [7]:
NUMEXPERIMENTS=1
models=list()
test_mses=list()
test_mres=list()
test_preds_all=list()
test_targets_all=list()

#Reproducibility
seed_everything(seed=123)

for expt_no in range(NUMEXPERIMENTS):
    print("Experiment {}".format(expt_no))
    # Compile model
    model = DNN(D_in, H, D_out, Depth,device=device).to(device)
    # Loss Function
    criterion = torch.nn.MSELoss()
    # Optimizer
    optimizer = optim.Adadelta(model.parameters())
    
    # Input Data
    trainX = Variable(torch.from_numpy(X_train).float()).to(device)
    trainY = Variable(torch.from_numpy(y_train[:,:11]).float()).to(device)  #FX-nondim + 10 velocityX values.
    valX = Variable(torch.from_numpy(X_val).float()).to(device)
    valY = Variable(torch.from_numpy(y_val[:,:11]).float()).to(device)     #FX-nondim + 10 velocityX values.
    testX = Variable(torch.from_numpy(X_test).float()).to(device)  
    testY = Variable(torch.from_numpy(y_test[:,:11]).float()).to(device)   #FX-nondim + 10 velocityX values.
    losses = list()

    data_train_loader = DataLoader(
        list(zip(trainX,trainY)), 
        batch_size=BATCHSIZE, 
        shuffle=True
    )

    print("Epochs")

    for epoch in range(NUMEPOCHS):
        alltargets = list()
        allpredictions = list()
        for batchX, batchY in data_train_loader: 
            # Forward pass
            outputs = model(batchX)
            loss = criterion(outputs.squeeze(), batchY.squeeze())
            allpredictions.append(outputs)
            alltargets.append(batchY)
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()     
            optimizer.step()
            losses.append(loss)

        # Print Epochs and Losses to Monitor Convergence
        if epoch % 50 == 0:
            print("{}".format(epoch),end = ", ")
            print(loss)

    print('\nTraining Complete')
    print("Testing...")
    with torch.no_grad():
        testpreds = model(testX)
        preds = torch.from_numpy(
            std_scaler_y.inverse_transform(testpreds.data.cpu())
        ).float()
        tgts = torch.from_numpy(
            std_scaler_y.inverse_transform(testY.data.cpu().numpy())
        ).float()
        
        preds=preds[:,0]
        tgts=tgts[:,0]
        
        print("Preds Shape = {}, Targets Shape = {}".format(preds.size(),tgts.size()))
        error = np.mean(np.square(preds.squeeze().data.numpy() - tgts.squeeze().data.numpy()))
        # print("Test MSE = {}".format(torch.mean((testpreds[:,0]-testY[:,0])**2).cpu().data.numpy()))
        print("Test MSE = {}".format(error))
        
        #Relative Error
        rel_err=np.mean(np.abs(preds.squeeze().data.cpu().numpy() - tgts.squeeze().data.cpu().numpy())/y_test[:,-2])
        print("Relative Error = {}".format(rel_err))
        
        aurec=ev_viz.aurec(preds.squeeze().data.cpu().numpy(),tgts.squeeze().data.cpu().numpy(),y_test[:,-2])
        print("AUREC = {}".format(aurec))
        print("===============================================================\n")

    
    models.append(model)
    test_mses.append(error)
    test_mres.append(rel_err)
    test_preds_all.append(preds)
    test_targets_all.append(tgts)
    
    #END For.
    
#Save Models, Predictions (of the model which is the best in terms of MRE.)
#Store predictions, targets and average value as numpy array.
print("Overall Test MSE = {}, Test MRE = {}".format(round(np.mean(test_mses),5),round(np.mean(test_mres),5)))   

Experiment 0
Epochs
0, tensor(0.7011, device='cuda:0', grad_fn=<MseLossBackward>)
50, tensor(0.0900, device='cuda:0', grad_fn=<MseLossBackward>)
100, tensor(0.0759, device='cuda:0', grad_fn=<MseLossBackward>)
150, tensor(0.0496, device='cuda:0', grad_fn=<MseLossBackward>)
200, tensor(0.0503, device='cuda:0', grad_fn=<MseLossBackward>)
250, tensor(0.0357, device='cuda:0', grad_fn=<MseLossBackward>)
300, tensor(0.0324, device='cuda:0', grad_fn=<MseLossBackward>)
350, tensor(0.0157, device='cuda:0', grad_fn=<MseLossBackward>)
400, tensor(0.0133, device='cuda:0', grad_fn=<MseLossBackward>)
450, tensor(0.0137, device='cuda:0', grad_fn=<MseLossBackward>)

Training Complete
Testing...
Preds Shape = torch.Size([2621]), Targets Shape = torch.Size([2621])
Test MSE = 25.871679306030273
Relative Error = 0.18492440453188247
AUREC = 0.8283288821060664

Overall Test MSE = 25.871679306030273, Test MRE = 0.18492


In [8]:
MODEL='DNN+-ALL-Velocity'
_tmp=std_scaler_x.inverse_transform(X_test)
_df=pd.DataFrame(np.vstack([preds.to("cpu").data.numpy().ravel(),tgts.to("cpu").data.numpy().ravel(),y_test[:,-2],_tmp[:,-2],_tmp[:,-1]]).T,columns=['{}-Predictions'.format(MODEL),'Targets','Mean','Re','Solidfraction'])
outputfile="/home/nik90/experiments/particleDragForce/dnn_all/{}_case1_predictions_TRAINPERCENTAGE_{}.csv".format(MODEL,TRAINPERCENTAGE)
_df.to_csv(outputfile,index=False)

#Save Model
torch.save(model.state_dict(),"../../../../models/Nikhil_{}_Case1_TrainPercentage_{}".format(MODEL,TRAINPERCENTAGE))