# **Importing packages**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
train_path = "../input/ventilator-pressure-prediction/train.csv"
test_path = "../input/ventilator-pressure-prediction/test.csv"
sample_sub = "../input/ventilator-pressure-prediction/sample_submission.csv"

# **Data Analysis**

In [None]:
# Loading the data
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path) 

In [None]:
# First 10 rows of train_data
train_data.head(10)

In [None]:
# Checking for missing values & size of the data
print(f"Rows in training data : {train_data.shape[0]}")
print(f"Rows in test data : {test_data.shape[0]}")
print(f"Columns in train_data : {train_data.columns.tolist()}")
print("Target column: pressure\n")

print(f"Missing values in train data\n{train_data.isna().sum().to_frame()}\n")
print(f"Missing values in test data\n{test_data.isna().sum().to_frame()}\n")


In [None]:
# Taking 1 ventilation cycle 
ventilation_cycle = train_data[train_data['breath_id']==3]
print(f"Unique value counts in each time stamp\n{ventilation_cycle.nunique()}\n")

The values of C & R are constant in each ventilation cycle

# **DATA VISUALIZATION**

In [None]:
def draw_1_cycle(ventilation_cycle):
    v_id = ventilation_cycle[ventilation_cycle.u_out==1].id.values[0]
    plt.figure(figsize=(18, 5))
    
    for col in ventilation_cycle.columns:
        if col=="id":
            continue
        plt.plot(ventilation_cycle['id'], ventilation_cycle[col], label=col)
        
    l = ventilation_cycle.max().values
    l.sort()
    plt.vlines(x = v_id, ymin = 0.1, ymax = l[-2], linestyles="dotted", color="grey")
    plt.legend(loc = 'best')
    plt.title("Visualization of one ventilation cycle(~3s)")
    plt.show()

In [None]:
# Dotted line represent the open of exploratory valve
for i in range(1, 20, 4):
    draw_1_cycle(train_data[train_data['breath_id']==i])

**FEATURE ENGINEERING**

In [None]:
# diff_u_in : Difference of u_in after each time_step
train_data['diff_u_in1'] = train_data['u_in'] - train_data.groupby('breath_id')['u_in'].shift(1).fillna(0)
train_data['diff_u_in2'] = train_data['u_in'] - train_data.groupby('breath_id')['u_in'].shift(2).fillna(0)

train_data['R*C'] = train_data['R'] * train_data['C']
train_data['u_in_cumsum'] = train_data['u_in'].groupby(train_data['breath_id']).cumsum()
train_data['u_in_cumsum_diff'] = train_data['u_in_cumsum'] - train_data.groupby('breath_id')['u_in_cumsum'].shift(1).fillna(0)
train_data['u_in_cumsumXu_out'] = train_data['u_in_cumsum'] * (-1*train_data['u_out'] + 1)
train_data['u_inX_u_out'] = train_data['u_in'] * (-1*train_data['u_out'] + 1)

train_data['time_step_diff'] = train_data['time_step'] - train_data.groupby('breath_id')['time_step'].shift(1).fillna(0)
train_data['change_in_p'] = (train_data['u_in_cumsum_diff'] / train_data['time_step_diff']) * train_data['R']

In [None]:
# Graph of pressure, diff_u_in
plt.figure(figsize=(18, 5))
plt.plot(train_data['id'][:80], train_data['diff_u_in1'][:80], c='r', label='diff_u_in1')
plt.plot(train_data['id'][:80], train_data['diff_u_in2'][:80], c='g', label='diff_u_i2')
plt.plot(train_data['id'][:80], train_data['u_in_cumsum'][:80], c='y', label='u_in_cumsum')

plt.plot(train_data['id'][:80], train_data['pressure'][:80], c='b', label='pressure')

In [None]:
#print(f"count of breath id in each ventilation cycle : {train_data['breath_id'].value_counts().unique()}")

In [None]:
# Changing to time series data
#train_data = train_data.values.reshape(-1, 80, train_data.shape[-1])
#print(f"Shape of reshaped training data : {train_data.shape}")