In [5]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

In [6]:
train = pd.read_csv('ventilator-pressure-prediction/train.csv',index_col = 'id')

In [7]:
test = pd.read_csv('ventilator-pressure-prediction/test.csv',index_col = 'id')

In [8]:
train

Unnamed: 0_level_0,breath_id,R,C,time_step,u_in,u_out,pressure
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1,20,50,0.000000,0.083334,0,5.837492
2,1,20,50,0.033652,18.383041,0,5.907794
3,1,20,50,0.067514,22.509278,0,7.876254
4,1,20,50,0.101542,22.808822,0,11.742872
5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...
6035996,125749,50,10,2.504603,1.489714,1,3.869032
6035997,125749,50,10,2.537961,1.488497,1,3.869032
6035998,125749,50,10,2.571408,1.558978,1,3.798729
6035999,125749,50,10,2.604744,1.272663,1,4.079938


In [9]:
def add_feat(df):
    df['lag_1'] = df.groupby(['breath_id'])['u_in'].shift(1)
    df['lag_2'] = df.groupby(['breath_id'])['u_in'].shift(2)
    df['lag_3'] = df.groupby(['breath_id'])['u_in'].shift(3)
    df['lag_4'] = df.groupby(['breath_id'])['u_in'].shift(4)
    df['lag_5'] = df.groupby(['breath_id'])['u_in'].shift(5)
    df['lag_6'] = df.groupby(['breath_id'])['u_in'].shift(6)
    df['lag_7'] = df.groupby(['breath_id'])['u_in'].shift(7)
    df['lag_11'] = df.groupby(['breath_id'])['u_in'].shift(-1)
    df['lag_21'] = df.groupby(['breath_id'])['u_in'].shift(-2)
    df['lag_31'] = df.groupby(['breath_id'])['u_in'].shift(-3)
    df['lag_41'] = df.groupby(['breath_id'])['u_in'].shift(-4)
    df['lag_51'] = df.groupby(['breath_id'])['u_in'].shift(-5)
    df['lag_61'] = df.groupby(['breath_id'])['u_in'].shift(-6)
    df['lag_71'] = df.groupby(['breath_id'])['u_in'].shift(-7)
    df['u_in_diff'] = df['u_in'].diff()
    df['u_in_diff1'] = df['u_in'] - df['lag_1']
    df['u_in_diff2'] = df['u_in'] - df['lag_2']
    df['u_in_diff3'] = df['u_in'] - df['lag_3']
    df['u_in_diff4'] = df['u_in'] - df['lag_4']
    df['u_in_diff5'] = df['u_in'] - df['lag_5']
    df['u_in_diff6'] = df['u_in'] - df['lag_6']
    df['u_in_diff7'] = df['u_in'] - df['lag_7']
    df.fillna(0.0,inplace = True)
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df.R = df.R.astype('category')
    df.C = df.C.astype('category')
    df.R__C = df.R__C.astype('category')
    return df

In [10]:
train = add_feat(train)
test = add_feat(test)

In [11]:
train

Unnamed: 0_level_0,breath_id,R,C,time_step,u_in,u_out,pressure,lag_1,lag_2,lag_3,...,lag_71,u_in_diff,u_in_diff1,u_in_diff2,u_in_diff3,u_in_diff4,u_in_diff5,u_in_diff6,u_in_diff7,R__C
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,20,50,0.000000,0.083334,0,5.837492,0.000000,0.000000,0.000000,...,26.807732,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,20__50
2,1,20,50,0.033652,18.383041,0,5.907794,0.083334,0.000000,0.000000,...,27.864715,18.299707,18.299707,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,20__50
3,1,20,50,0.067514,22.509278,0,7.876254,18.383041,0.083334,0.000000,...,28.313036,4.126236,4.126236,22.425944,0.000000,0.000000,0.000000,0.000000,0.000000,20__50
4,1,20,50,0.101542,22.808822,0,11.742872,22.509278,18.383041,0.083334,...,26.866758,0.299544,0.299544,4.425781,22.725488,0.000000,0.000000,0.000000,0.000000,20__50
5,1,20,50,0.135756,25.355850,0,12.234987,22.808822,22.509278,18.383041,...,26.762803,2.547028,2.547028,2.846573,6.972809,25.272516,0.000000,0.000000,0.000000,20__50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035996,125749,50,10,2.504603,1.489714,1,3.869032,1.420711,1.353205,1.357586,...,0.000000,0.069002,0.069002,0.136509,0.132127,0.127452,0.122451,0.117126,0.252039,50__10
6035997,125749,50,10,2.537961,1.488497,1,3.869032,1.489714,1.420711,1.353205,...,0.000000,-0.001217,-0.001217,0.067785,0.135292,0.130910,0.126235,0.121234,0.115909,50__10
6035998,125749,50,10,2.571408,1.558978,1,3.798729,1.488497,1.489714,1.420711,...,0.000000,0.070481,0.070481,0.069264,0.138266,0.205773,0.201391,0.196716,0.191715,50__10
6035999,125749,50,10,2.604744,1.272663,1,4.079938,1.558978,1.488497,1.489714,...,0.000000,-0.286315,-0.286315,-0.215834,-0.217050,-0.148048,-0.080542,-0.084923,-0.089598,50__10


In [13]:
train_x = train.drop('pressure',axis=1)
train_y = train.pressure

In [14]:
tran_x, test_x, tran_y, test_y = train_test_split(train_x,train_y,test_size=0.2)

In [15]:
model = lgb.LGBMRegressor(n_estimators=4000)

In [16]:
model.fit(tran_x,tran_y,eval_set = (test_x,test_y),verbose=5)

[5]	valid_0's l2: 31.9304
[10]	valid_0's l2: 18.4827
[15]	valid_0's l2: 12.7059
[20]	valid_0's l2: 9.95589
[25]	valid_0's l2: 8.54478
[30]	valid_0's l2: 7.7753
[35]	valid_0's l2: 7.24893
[40]	valid_0's l2: 6.88807
[45]	valid_0's l2: 6.63479
[50]	valid_0's l2: 6.42676
[55]	valid_0's l2: 6.26524
[60]	valid_0's l2: 6.1477
[65]	valid_0's l2: 6.03442
[70]	valid_0's l2: 5.93488
[75]	valid_0's l2: 5.84353
[80]	valid_0's l2: 5.77543
[85]	valid_0's l2: 5.70454
[90]	valid_0's l2: 5.63996
[95]	valid_0's l2: 5.58302
[100]	valid_0's l2: 5.5357
[105]	valid_0's l2: 5.48842
[110]	valid_0's l2: 5.44595
[115]	valid_0's l2: 5.40098
[120]	valid_0's l2: 5.36302
[125]	valid_0's l2: 5.31846
[130]	valid_0's l2: 5.28719
[135]	valid_0's l2: 5.25247
[140]	valid_0's l2: 5.23165
[145]	valid_0's l2: 5.20235
[150]	valid_0's l2: 5.17277
[155]	valid_0's l2: 5.15853
[160]	valid_0's l2: 5.12905
[165]	valid_0's l2: 5.10411
[170]	valid_0's l2: 5.08295
[175]	valid_0's l2: 5.05996
[180]	valid_0's l2: 5.04158
[185]	valid_0's

[1465]	valid_0's l2: 3.52573
[1470]	valid_0's l2: 3.52393
[1475]	valid_0's l2: 3.52143
[1480]	valid_0's l2: 3.52015
[1485]	valid_0's l2: 3.51855
[1490]	valid_0's l2: 3.51748
[1495]	valid_0's l2: 3.5144
[1500]	valid_0's l2: 3.51306
[1505]	valid_0's l2: 3.51181
[1510]	valid_0's l2: 3.51016
[1515]	valid_0's l2: 3.50867
[1520]	valid_0's l2: 3.50793
[1525]	valid_0's l2: 3.50557
[1530]	valid_0's l2: 3.5045
[1535]	valid_0's l2: 3.50291
[1540]	valid_0's l2: 3.50086
[1545]	valid_0's l2: 3.49858
[1550]	valid_0's l2: 3.49745
[1555]	valid_0's l2: 3.49363
[1560]	valid_0's l2: 3.49145
[1565]	valid_0's l2: 3.4895
[1570]	valid_0's l2: 3.48797
[1575]	valid_0's l2: 3.48645
[1580]	valid_0's l2: 3.48459
[1585]	valid_0's l2: 3.48303
[1590]	valid_0's l2: 3.48155
[1595]	valid_0's l2: 3.4801
[1600]	valid_0's l2: 3.47947
[1605]	valid_0's l2: 3.47823
[1610]	valid_0's l2: 3.47702
[1615]	valid_0's l2: 3.47584
[1620]	valid_0's l2: 3.47364
[1625]	valid_0's l2: 3.47226
[1630]	valid_0's l2: 3.4704
[1635]	valid_0's l2

[2885]	valid_0's l2: 3.18981
[2890]	valid_0's l2: 3.18861
[2895]	valid_0's l2: 3.18771
[2900]	valid_0's l2: 3.18671
[2905]	valid_0's l2: 3.18487
[2910]	valid_0's l2: 3.18391
[2915]	valid_0's l2: 3.18335
[2920]	valid_0's l2: 3.18234
[2925]	valid_0's l2: 3.18133
[2930]	valid_0's l2: 3.18074
[2935]	valid_0's l2: 3.17997
[2940]	valid_0's l2: 3.17873
[2945]	valid_0's l2: 3.17734
[2950]	valid_0's l2: 3.1756
[2955]	valid_0's l2: 3.17411
[2960]	valid_0's l2: 3.17319
[2965]	valid_0's l2: 3.17225
[2970]	valid_0's l2: 3.17159
[2975]	valid_0's l2: 3.17059
[2980]	valid_0's l2: 3.16959
[2985]	valid_0's l2: 3.16894
[2990]	valid_0's l2: 3.16813
[2995]	valid_0's l2: 3.16731
[3000]	valid_0's l2: 3.16539
[3005]	valid_0's l2: 3.16487
[3010]	valid_0's l2: 3.16423
[3015]	valid_0's l2: 3.16333
[3020]	valid_0's l2: 3.1627
[3025]	valid_0's l2: 3.16191
[3030]	valid_0's l2: 3.1612
[3035]	valid_0's l2: 3.16072
[3040]	valid_0's l2: 3.15965
[3045]	valid_0's l2: 3.15887
[3050]	valid_0's l2: 3.15801
[3055]	valid_0's 

LGBMRegressor(n_estimators=4000)

In [17]:
final = model.predict(test)

In [18]:
final

array([6.07186239, 5.90867508, 6.80089588, ..., 6.11390917, 6.10374477,
       6.16804868])

In [19]:
sub = pd.read_csv('ventilator-pressure-prediction/sample_submission.csv')

In [20]:
sub.pressure = final

In [21]:
sub

Unnamed: 0,id,pressure
0,1,6.071862
1,2,5.908675
2,3,6.800896
3,4,7.662600
4,5,9.331187
...,...,...
4023995,4023996,6.077330
4023996,4023997,6.107924
4023997,4023998,6.113909
4023998,4023999,6.103745


In [22]:
sub.index = sub.id
sub.drop('id',axis=1,inplace=True)

In [23]:
sub.to_csv('lgb_submission.csv')