In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import statsmodels.api as sm
import os

In [None]:
df_brain = pd.read_csv("../input/ventilator-pressure-prediction/train.csv")
df_brain.shape

### After exploring the data it is a time series of 80 timesteps
- The printed output gives better intution

In [None]:
print(df_brain['time_step'].iloc[:240:80])

### Let us check for the null values in the data set

In [None]:
df_brain.pop("id")
#df_brain.pop("time_step")
df_brain.pop('breath_id')
df_brain.isnull().sum()

### The data is clean and let us prepare IID data
- Every Time Series set is IID
- Let us create numpy array out of it
- generators are good for large data

In [None]:
print(df_brain.min(),df_brain.max())

### We will have to scale the data as output has negative values If we do MinMax Scaling to keep values from 0 to 1
- If we use neural network we can use Either Relu or Sigmoid as activation functions to handle valuses below 0

In [None]:
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler()
y_train = df_brain['pressure']
df_brain.pop("pressure")
sc_y = MinMaxScaler()
y_train = sc_y.fit_transform(np.array(y_train).reshape(-1,1))
df_brain[df_brain.columns] = sc.fit_transform(df_brain)
df_brain['pressure'] = y_train.reshape(-1)
df_brain.head()

## Building a generator

In [None]:
def num_batches(dataset,batch_size,time_steps):
    if len(dataset) % batch_size == 0:
        return len(dataset) // (batch_size*time_steps)
    else:
        return  len(dataset) // (batch_size*time_steps) +1
    

#### generating data of one timestep
### step no starts with 0
def step(dataset,time_steps,step_no):
    sample_in = dataset.iloc[time_steps*step_no:time_steps*step_no + time_steps,:-1].values
    sample_out = dataset.iloc[time_steps*step_no:time_steps*step_no + time_steps,-1].values
    
    return sample_in,sample_out
    

#### This is a generator function below Input should be a pandas dataframe

def data_gen(dataset,time_steps,batch_size,features):
    isValid = len(dataset) % batch_size == 0
    batches = num_batches(dataset,batch_size,time_steps)
    print(batches)
    while True:
       
        
        for batch in range(batches):
            
            if batch < batches -1 or isValid:
                samples_in = np.zeros((batch_size,time_steps,features))
                samples_out = np.zeros((batch_size,time_steps))
                
                for i in range(batch_size):
                    sample_in, sample_out = step(dataset,time_steps,batch*batch_size +i)
                    #print(sample_in.shape,batch*batch_size*time_steps +i)                       
                    samples_in[i,:,:] = sample_in
                    samples_out[i,:] = sample_out
               
                yield  samples_in,samples_out
            
            else:
                
                bs = (len(dataset) % (batch_size*time_steps))//time_steps
                print(bs)
                samples_in = np.zeros((bs,time_steps,features))
                samples_out = np.zeros((bs,time_steps))
                for i in range(bs):
                    sample_in, sample_out = step(dataset,time_steps,bs*batch +i)
                    samples_in[i,:,:] = sample_in
                    samples_out[i,:] = sample_out
                yield  samples_in,samples_out

## Let us see the test data

In [None]:
df_test = pd.read_csv("../input/ventilator-pressure-prediction/test.csv")
df_test.shape

### Both are IID data
- Let us import keras for Neural network

In [None]:
gen = data_gen(df_brain,80,90,5)

from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM,GRU,Dense,Dropout,TimeDistributed


In [None]:
model = Sequential()
model.add(LSTM(512,input_shape=(80,5),return_sequences=True))
model.add(LSTM(1024,return_sequences=True))
model.add(Dropout(0.2))
model.add(TimeDistributed(Dense(1,activation='relu',kernel_initializer='normal')))

### Let us compile the model

In [None]:
import tensorflow as tf
model.compile(optimizer='adam',loss='mean_absolute_error')
model.summary()

In [None]:
model.fit(gen,steps_per_epoch=839,epochs=10)

In [None]:
sample = df_brain.iloc[2*80: 2*80+80,:5]
sample

In [None]:
predictions = model.predict(np.array(sample).reshape(1,80,5))

In [None]:
l = (predictions.reshape(-1) - df_brain.iloc[2*80: 2*80+80]['pressure'])
l.min()

#### 

In [None]:
plt.figure(figsize=(10,6))
plt.subplot(1,2,1)
plt.plot(range(predictions.shape[1]),predictions.reshape(-1))
plt.title("Predicted")
actual =  df_brain.iloc[2*80: 2*80+80]['pressure']
plt.subplot(1,2,2)
plt.plot(actual.index,actual)
plt.title("Actual")

### Actual and predicted doing good job on train set

In [None]:
df_test = df_test.drop(['id','breath_id'],axis=1)

df_test[df_test.columns] = sc.transform(df_test)

### Let us shape test data properly for prediction

In [None]:
def shape_test_data(data,timesteps):
    steps = data.shape[0]//timesteps
    data_in = np.zeros((steps,timesteps,data.shape[1]))
    for i in range(steps):
        data_in[i,:,:] = data.iloc[timesteps*i: timesteps*i + timesteps,:]
    return data_in

In [None]:
test_data = shape_test_data(df_test,80)
test_data.shape

In [None]:
predictions = model.predict(test_data)

In [None]:
y_test = sc_y.inverse_transform(predictions.reshape(-1,1)).reshape(-1)

df_submissions = pd.read_csv("../input/ventilator-pressure-prediction/test.csv")
df_submissions = df_submissions[['id']]
df_submissions['pressure'] = y_test

### After the predictions making submissions

In [None]:
df_submissions.to_csv("./submissions.csv",index=False)

### Lets try a model with timestep in input vector

In [None]:
model_1 = Sequential([
    GRU(512,input_shape=(80,4),return_sequences=True),
    GRU(1024,return_sequences=True),
    TimeDistributed(Dense(1,activation='relu',kernel_initializer='normal'))
])

model_1.compile(optimizer='adam',loss='mean_absolute_error')
model_1.summary()

In [None]:
train_data = df_brain.drop("time_step",axis=1)
data = data_gen(train_data,80,90,4)
history = model_1.fit(data,steps_per_epoch=839,epochs=15)

In [None]:
test_data_1 = shape_test_data(df_test.drop('time_step',axis=1),80)


In [None]:
predictions = model_1.predict(np.array(df_brain.iloc[240:320].loc[:,['R','C','u_in','u_out']]).reshape(1,80,4))
plt.plot(range(80),sc_y.inverse_transform(predictions[0,:,:]).reshape(-1))

### Predicting the Data

In [None]:
predictions = model_1.predict(test_data_1)

In [None]:
predictions = sc_y.inverse_transform(predictions.reshape(-1,1))

### Let us submit

In [None]:
df_submissions['pressure'] = predictions.reshape(-1)
df_submissions.head()

In [None]:
df_submissions.to_csv("submissions.csv",index=False)

In [None]:
train_data