In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Approach to solve this `problem`:
### * Explore the *dataset*
### * Lookout for *missing values*
### * Create new **features**
### * Choose the corerct features to be the part of `training data`
### * Scale the *dataset*
### * Build the `model`
### * Split the **dataset**
### * Optimize & Fit the model
### * Make & Submit **Predictions**

In [None]:
train_df=pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test_df=pd.read_csv('../input/ventilator-pressure-prediction/test.csv')

# Exploring the *dataset*

In [None]:
train_df.shape # Shape is needed to be validated for building the model

In [None]:
train_df.info()  # To get the idea about different type of features in dataset

In [None]:
train_df['u_out'].value_counts()

# Looking for Null-values

In [None]:
train_df.isnull().sum() 

# Creating new `Features`

In [None]:
Y=train_df['pressure'] # Separating our traget Feature

In [None]:
train_df['u_in_cumsum'] = (train_df['u_in']).groupby(train_df['breath_id']).cumsum()

test_df['u_in_cumsum'] = (test_df['u_in']).groupby(test_df['breath_id']).cumsum()

# Selecting the *`Best-Features`*

In [None]:
x=train_df.drop('u_out',axis=1)
y=train_df['u_out']

In [None]:
from sklearn.feature_selection import mutual_info_regression
# Mutual info regression will provide a mesure of dependence between the Features and Traget-variable

xtr=train_df.drop(['pressure','id'],axis=1)[:70000]
# Visualizing the dependence using first 70,000 rows

ytr=train_df['pressure'][:70000]

In [None]:
mi_score=mutual_info_regression(xtr,ytr)
# Providing the features and Target-variable as input

mi_score=pd.Series(mi_score,index=xtr.columns)
#converting the scores to a pandas series and providing the columns as index for the scores

mi_score=mi_score.sort_values(ascending=False)
# Sorting the scores in descending order

mi_score*100
# Getting the percentage-dependece out of 100

# Scaling the `final`-Features

In [None]:
from sklearn.preprocessing import StandardScaler
xtr=StandardScaler().fit_transform(train_df.drop(['pressure','R','C','id'],axis=1))
xte=StandardScaler().fit_transform(test_df.drop(['R','C','id'],axis=1))

# Building the Neural **`Model`**

In [None]:
import tensorflow.keras as keras
from tensorflow.keras import layers,callbacks

In [None]:
"""model=keras.Sequential([
    layers.Dense(27,input_shape=(5,),activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    
    layers.Dense(108,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    
    layers.Dense(324,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    
    layers.Dense(522,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    
    layers.Dense(819,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    
    layers.Dense(927,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    
    layers.Dense(720,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    
    layers.Dense(522,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    
    layers.Dense(288,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    
    layers.Dense(1,activation='linear')
    
])"""

In [None]:
model=keras.Sequential([
    layers.Dense(64,input_shape=(5,),activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    
    layers.Dense(224,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    
    layers.Dense(624,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    
    layers.Dense(312,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    
    layers.Dense(1,activation='relu')
    
])

In [None]:
model.summary()

# Splitting the `Dataset` into:
#### * Training set
#### * Validation set

In [None]:
from sklearn.model_selection import train_test_split
xtr,xval,ytr,yval=train_test_split(xtr,train_df['pressure'],test_size=0.3,random_state=51)

#### Compiling the model

In [None]:
model.compile(optimizer='adamax',loss='mean_absolute_error',metrics=['mean_absolute_error'])

# Fitting the model using `Callback` function

In [None]:
call=keras.callbacks.EarlyStopping(min_delta=0.0001,restore_best_weights=True,patience=8)
history=model.fit(xtr,ytr,validation_data=(xval,yval),epochs=45,batch_size=64,callbacks=[call])

# <1.5 **`Mean_Absolute_Error`**

# Visualizing the **`Epoch`** History

In [None]:
model_training=pd.DataFrame(history.history)

In [None]:
model_training

In [None]:
model_training.loc[:,['loss','val_loss']].plot()

In [None]:
model_training.loc[:,['mean_absolute_error','val_mean_absolute_error']].plot()

# Creating *predictions*

In [None]:
model.evaluate(xte)

In [None]:
xte

In [None]:
ypred=model.predict(xte)

# Submitting *`Predictions`*

In [None]:
sample=pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

In [None]:
sample

In [None]:
ypred=pd.DataFrame(ypred)

In [None]:
sub=pd.DataFrame({'id':test_df['id'],'pressure':ypred[0]})
sub.to_csv('subbb.csv',index=False)