In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
folder = '/kaggle/input/ventilator-pressure-prediction/'

In [None]:
train = pd.read_csv(folder + '/train.csv')
test = pd.read_csv(folder + '/test.csv')

In [None]:
sample_submission = pd.read_csv(folder + '/sample_submission.csv')

In [None]:
train['time_id'] = np.concatenate([range(1, 81)] * len(train['breath_id'].unique()))
test['time_id'] = np.concatenate([range(1, 81)] * len(test['breath_id'].unique()))

In [None]:
# Cumulative sum of u_in

train['cumsum'] = train.groupby('breath_id')['u_in'].cumsum()
test['cumsum'] = test.groupby('breath_id')['u_in'].cumsum()

In [None]:
# Add previous values of u_in

def add_prevs(data, n_shifts=20):
    for i in range(1, n_shifts):
        data['prev_u_in' + str(i)] = data['u_in'].shift(i)
        data.loc[data['time_id'] < i + 1, 'prev_u_in' + str(i)] = 0

In [None]:
add_prevs(train)
add_prevs(test)

In [None]:
def add_futs(data, n_shifts=20):
    for i in range(1, n_shifts):
        data['fut_u_in' + str(i)] = data['u_in'].shift(i)
        data.loc[data['time_id'] > 80 - i, 'fut_u_in' + str(i)] = 0

In [None]:
add_futs(train)
add_futs(test)

In [None]:
# Add differences of u_in 

def add_diffs(data, n_shifts=20):
    for i in range(1, n_shifts):
        data['diff_u_in' + str(i)] = data['u_in'].diff(i)
        data.loc[data['time_id'] < i + 1, 'diff_u_in' + str(i)] = 0


In [None]:
add_diffs(train)
add_diffs(test)

In [None]:
from sklearn.metrics import mean_absolute_error as mae

In [None]:
# Prepare dataset

X = train.drop(columns=['id', 'pressure'])
y = train['pressure']

In [None]:
# Train_test splitter
# Two instances with the same id must be in the same set

def train_test_split(X, y, drop_id=True, test_size=0.3):
    ids = X['breath_id'].unique()
    train_ids = np.random.choice(ids, replace=False, size=int(len(ids) * (1 - test_size)))
    
    if drop_id:
        X_train = X[X['breath_id'].isin(train_ids)].drop(columns='breath_id')
        X_test = X[~X['breath_id'].isin(train_ids)].drop(columns='breath_id')
    else: 
        X_train = X[X['breath_id'].isin(train_ids)]
        X_test = X[~X['breath_id'].isin(train_ids)]
        
        
    y_train = y[X['breath_id'].isin(train_ids)]
    y_test = y[~X['breath_id'].isin(train_ids)]
    
    return X_train, X_test, y_train, y_test

In [None]:
# Train/Test split

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
print('N features: ', X_train.shape[1])

In [None]:
del X, y, train

# Train

In [None]:
import lightgbm as lgb

In [None]:
reg = lgb.LGBMRegressor(num_leaves=2048, n_estimators=3000, learning_rate=0.1)
reg.fit(X_train, y_train)

In [None]:
del X_train, y_train

In [None]:
print('Test mae: ', mae(y_test, reg.predict(X_test)))

In [None]:
pred = reg.predict(test.drop(columns=['id', 'breath_id']))

In [None]:
sample_submission['pressure'] = pred

In [None]:
sample_submission.to_csv('submission.csv', index=False)