In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
train_df = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
train_df.head()

In [None]:
train_df.shape

In [None]:
train_df.describe()

In [None]:
test_df = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
test_df.head()

In [None]:
test_df.shape

In [None]:
test_df.describe()

In [None]:
tr_br_id = train_df.breath_id.unique()
ts_br_id = test_df.breath_id.unique()

common_br_id = []

for el in ts_br_id:
  if el in tr_br_id:
    common_br_id.append(el)
print(f"Common ids on train and test data : {len(common_br_id)}")

In [None]:
train_df.drop_duplicates(['breath_id', 'time_step'], inplace = True)

In [None]:
train_df.drop('breath_id', 1, inplace = True)
test_df.drop('breath_id', 1, inplace = True)

In [None]:
train_df.isnull().sum()

In [None]:
test_df.isnull().sum()

In [None]:
train_df.dtypes

In [None]:
train_df.sort_values(by=['time_step'], inplace = True)
train_df.head()

In [None]:
test_id = test_df.id

In [None]:
train_df.drop('id', 1, inplace = True)
test_df.drop('id', 1, inplace = True)

In [None]:
train_df.head()

In [None]:
train_df['R3C'] = train_df.R**3*train_df.C
test_df['R3C'] = test_df.R**3*test_df.C

In [None]:
train_df.head()

In [None]:
ax, fig = plt.subplots(2, 2, figsize = (15, 8))
plt.suptitle('R-C Histogram', size = 20)
plt.subplot(2, 2, 1)
plt.hist(train_df.R)
plt.title('R-Train')
plt.subplot(2, 2, 2)
plt.hist(train_df.C)
plt.title('C-Train')
plt.subplot(2, 2, 3)
plt.hist(test_df.R)
plt.title('R-Test')
plt.subplot(2, 2, 4)
plt.hist(test_df.C)
plt.title('C-Test')
plt.show()

In [None]:
ax, fig = plt.subplots(1, 1, figsize = (15, 5))
plt.suptitle('u_in Distribution', size = 20)
plt.subplot(1, 2, 1)
sns.boxplot(train_df.u_in)
plt.title('box_plot')
plt.subplot(1, 2, 2)
plt.hist(train_df.u_in)
plt.title('histogram')
plt.show()

In [None]:
ax, fig = plt.subplots(1, 1, figsize = (15, 5))
plt.suptitle('u_out Distribution', size = 20)
plt.subplot(1, 2, 1)
sns.boxplot(train_df.u_out)
plt.title('box_plot')
plt.subplot(1, 2, 2)
plt.hist(train_df.u_out)
plt.title('histogram')
plt.show()

In [None]:
corr_data = train_df.corr()
sns.heatmap(corr_data)
plt.title('Correlation Plot')
plt.show()

In [None]:
train_df.drop(['R', 'C', 'R3C'], 1, inplace = True)
test_df.drop(['R', 'C', 'R3C'], 1, inplace = True)

In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
for col in ['time_step', 'u_in', 'u_out']:
  print(f"[Train data][{col}] | Minimum Value : {min(train_df[col])} | Maximum Value : {max(train_df[col])}")
  print(f"[Test data][{col}] | Minimum Value : {min(test_df[col])} | Maximum Value : {max(test_df[col])}")

In [None]:
def minmaxscaler(train_data, test_data):
  min_val = min(train_data)
  max_val = max(train_data)
  return (train_data - min_val) / (max_val - min_val) , (test_data - min_val) / (max_val - min_val)

for col in ['time_step', 'u_in', 'u_out']:
  train_df[col], test_df[col] = minmaxscaler(train_df[col], test_df[col])

In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X, y = train_df.drop('pressure', 1) , train_df.pressure

In [None]:
train_df, val_df = train_test_split(train_df, test_size = 0.2, shuffle = True, random_state = 42)
train_df.shape, val_df.shape

In [None]:
test_df.shape

In [None]:
type(train_df.iloc[0,-1:][0])

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset


class ANN_model(nn.Module):
  
  def __init__(self):
    super(ANN_model, self).__init__()
    self.dense1 = nn.Linear(3, 1024)
    self.dense2 = nn.Linear(1024, 2048)
    self.op = nn.Linear(2048, 1)

    self.model = nn.Sequential(
        self.dense1,
        self.dense2,
        self.op
    )
  def forward(self, x):
    return self.model(x)


rand_data = torch.rand(5,3)
model = ANN_model()
print(f"Output Shape : {model(rand_data).shape}")

In [None]:
class ANN_dataset(Dataset):
  def __init__(self, dataset, test_data = False):
    self.dataset = dataset
    self.test_data = test_data

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, index):
    X = torch.tensor(self.dataset.iloc[index, :3])
    if self.test_data != True:
      y = torch.tensor(self.dataset.iloc[index, -1:][0])
      return (X, y)
    return X

In [None]:
train_ds = ANN_dataset(train_df)
val_ds = ANN_dataset(val_df)
test_ds = ANN_dataset(test_df,True)

In [None]:
train_dl = DataLoader(train_ds, batch_size = 100, shuffle = True)
val_dl = DataLoader(val_ds, batch_size = 100, shuffle = True)
test_dl = DataLoader(test_ds, batch_size = 100, shuffle = False)

In [None]:
for X, y in train_dl:
  print(X.shape, y.shape)
  break
for X, y in val_dl:
  print(X.shape, y.shape)
  break
for X in test_dl:
  print(X.shape)
  break

In [None]:
# Chossing training hyperparameters and also the optimizer and loss

EPOCHS = 10
criterion = nn.MSELoss()
optim = torch.optim.Adam(params = model.parameters(), lr = 1e-4)

In [None]:
from termcolor import cprint

In [None]:
model = model.cuda()  # Putting the model inside GPU

In [None]:
for name,param in model.named_parameters():
  print(name, param.dtype)

In [None]:
# Model Training...
train_loss = []
val_loss = []
best_loss = np.inf
for epoch in range(EPOCHS):
    print(f"Epoch {epoch + 1} : \n")
    TR_LOSS = 0.0
    VAL_LOSS = 0.0
    model.train()
    
    # Train Data Forward & Backward Pass
    
    for index, (train_patch, labels) in enumerate(train_dl):
        optim.zero_grad()
        train_patch = train_patch.float().cuda()
        labels = labels.float().cuda()
        op = model(train_patch)
        tloss = criterion(op, labels)
        TR_LOSS += tloss.item()
        train_loss.append(tloss.item())
        tloss.backward()
        optim.step()
        
        if index % 1000 == 999:
            print(f"         Step {index + 1} Loss : {'%.4f'%(tloss.item())}") 
        if index == 9999:
            break
    model.eval()
    # Validation Checking ( Only Forward Pass )
    
    with torch.no_grad():
        for index, (val_patch, labels) in enumerate(val_dl):
            val_patch = val_patch.float().cuda()
            labels = labels.float().cuda()
            op = model(val_patch)
            vloss = criterion(op, labels)
            VAL_LOSS += vloss.item()
            val_loss.append(vloss.item())
            if index == 2999:
                break
                
    print(f"\n     Training Loss : {'%.4f'%(TR_LOSS)}  ||  Validation Loss : {'%.4f'%(VAL_LOSS)}\n")
    
    if VAL_LOSS < best_loss :      # Model Updationg
        cprint("Model Updation : Success!\n", 'green')
        torch.save(model, 'best_model.pth')
        best_loss = VAL_LOSS
    else:
        cprint("Model Updation : Failed!\n", 'red')
cprint('Training completed...', 'blue')

In [None]:
best_model = torch.load('best_model.pth')
best_model

In [None]:
pred = []
with torch.no_grad():
    for index, test_patch in enumerate(test_dl):
        test_patch = test_patch.float().cuda()
        op = best_model(val_patch).cpu().detach().numpy().tolist()
        if index % 1000 == 999:
            print(f"Predicted {index + 1}th patch...")
        for el in op:
            pred.append(el)
print('Prediction data prepared...')

In [None]:
test_id = test_id.tolist()

In [None]:
prediction_dataframe = pd.DataFrame({'id': test_id,'pressure':pred})
prediction_dataframe.head()

In [None]:
prediction_dataframe['pressure'] = prediction_dataframe['pressure'].apply(lambda x: x[0])

In [None]:
prediction_dataframe.head()

In [None]:
prediction_dataframe.to_csv('submission.csv', index = False)