In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
import pandas as pd
import plotly.graph_objects as go
train = pd.read_csv('drive/My Drive/ML_Data/train.csv')


In [15]:
fig = go.Figure()
for i in list(set(train['breath_id']))[:10]:
    fig.add_trace(go.Scatter(x=train[train['breath_id']==i]['time_step'], y=train[train['breath_id']==i]['pressure'],
                        mode='lines',
                        name=str(i)))
    
fig.update_layout(title='Pressure v/s time_step for 10 Breath IDs',
                   xaxis_title='time_step',
                   yaxis_title='Pressure')

fig.show()

In [16]:
fig = go.Figure()
for i in list(set(train['breath_id']))[:10]:
    fig.add_trace(go.Scatter(x=train[train['breath_id']==i]['time_step'], y=train[train['breath_id']==i]['u_out'],
                        mode='lines',
                        name=str(i)))
    
fig.update_layout(title='u_out vs. time_step for 10 Breath IDs',
                   xaxis_title='time_step',
                   yaxis_title='Pressure')

fig.show()

In [17]:
fig = go.Figure()
for i in list(set(train['breath_id']))[:10]:
    fig.add_trace(go.Scatter(x=train[train['breath_id']==i]['time_step'], y=train[train['breath_id']==i]['u_in'],
                        mode='lines',
                        name=str(i)))
    
fig.update_layout(title='u_in v/s time_step for 10 Breath IDs',
                   xaxis_title='time_step',
                   yaxis_title='u_in')

fig.show()

In [18]:
train['area'] = train['time_step'] * train['u_in']
train['area'] = train.groupby('breath_id')['area'].cumsum()

In [19]:
train['u_in_lag2'] = train['u_in'].shift(2).fillna(0)
train['u_in_lag4'] = train['u_in'].shift(4).fillna(0)

In [20]:
train['R'] = train['R'].astype(str)
train['C'] = train['C'].astype(str)
train = pd.get_dummies(train)

In [None]:
train["ewm_u_in_mean"] = train.groupby('breath_id').u_in.apply(lambda x: x.ewm(halflife=10).mean())
train['ewm_u_in_std']  = train.groupby('breath_id').u_in.apply(lambda x: x.ewm(halflife=10).std())
train['ewm_u_in_corr']  = train.groupby('breath_id').u_in.apply(lambda x: x.ewm(halflife=10).corr())

In [None]:
train["rolling_10_mean"] = train.groupby('breath_id').u_in.apply(lambda x: x.rolling(window=10, min_periods=1).mean())
train['rolling_10_max']  = train.groupby('breath_id').u_in.apply(lambda x: x.rolling(window=10, min_periods=1).max())
train['rolling_10_std']  = train.groupby('breath_id').u_in.apply(lambda x: x.rolling(window=10, min_periods=1).std())

In [None]:
train['expand_mean'] =train.groupby('breath_id').u_in.apply(lambda x: x.expanding(2).mean())
train['expand_max'] = train.groupby('breath_id').u_in.apply(lambda x: x.expanding(2).max())
train['expand_std'] = train.groupby('breath_id').u_in.apply(lambda x: x.expanding(2).std())

In [None]:
train = train.fillna(0)

In [None]:
targets = train[['pressure']].to_numpy()
train.drop(['pressure'], axis = 1, inplace = True)

In [None]:
from sklearn.preprocessing import RobustScaler, normalize
RS = RobustScaler()
train = RS.fit_transform(train)

In [None]:


from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(train, targets, test_size=0.3, shuffle=True, random_state=42)


In [None]:
from sklearn.model_selection import KFold
from xgboost import XGBRegressor
    
kf = KFold(n_splits = 5, shuffle = True, random_state = 42)
test_preds = []
for fold, (train_idx, test_idx) in enumerate(kf.split(X_train, y_train)):
    print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
    X, X_valid = X_train[train_idx], X_train[test_idx]
    y, y_valid = y_train[train_idx], y_train[test_idx]
    model =XGBRegressor(learning_rate = 0.1, n_estimators = 5000,max_depth =9,
                n_jobs = 16, tree_method = 'gpu_hist')
    model_xgb = model.fit(X_train,y_train)

    test_preds.append(model_xgb.predict(X_test))


--------------- > Fold 1 < ---------------


In [None]:
Y_pred=sum(test_preds)/5


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,u_in_cumsum,time_step_cumsum,area,time_diff,time_diff2,time_diff3,time_diff4,time_diff5,time_diff6,time_diff7,time_diff8,u_in_lag1,u_in_lag2,u_in_lag3,u_in_lag4,u_in_lag5,u_in_lag6,u_in_lag7,u_in_lag8,u_in_lag9,u_in_lag10,u_in_lag11,u_in_lag12,u_in_lag13,u_in_lag14,u_in_lag15,u_in_lag16,u_in_lag17,u_in_lag18,u_in_lag19,u_in_lag20,u_in_lag21,diff,diff2
0,1,1,20,50,0.000000,0.083334,0,0.083334,0.000000,1000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,2,1,20,50,0.033652,18.383041,0,18.466375,0.033652,1000,0.033652,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.083334,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,3,1,20,50,0.067514,22.509278,0,40.975653,0.101167,1000,0.033862,0.067514,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,18.383041,0.083334,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,18.299707,0.000000
3,4,1,20,50,0.101542,22.808822,0,63.784476,0.202709,1000,0.034028,0.067890,0.101542,0.000000,0.000000,0.000000,0.000000,0.000000,22.509278,18.383041,0.083334,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.126236,22.425944
4,5,1,20,50,0.135756,25.355850,0,89.140326,0.338464,1000,0.034213,0.068241,0.102103,0.135756,0.000000,0.000000,0.000000,0.000000,22.808822,22.509278,18.383041,0.083334,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.299544,4.425781
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035995,6035996,125749,50,10,2.504603,1.489714,1,387.109297,95.147046,500,0.033412,0.066817,0.100145,0.133483,0.166920,0.200291,0.233721,0.267134,1.420711,1.353205,1.357586,1.362261,1.367263,1.372588,1.237674,1.316864,1.184357,1.195817,1.137701,1.152109,1.167516,1.113529,1.132450,0.871425,1.109427,0.993555,0.882208,0.775560,0.744318,0.067506,0.063125
6035996,6035997,125749,50,10,2.537961,1.488497,1,388.597794,97.685006,500,0.033358,0.066769,0.100175,0.133503,0.166841,0.200278,0.233649,0.267078,1.489714,1.420711,1.353205,1.357586,1.362261,1.367263,1.372588,1.237674,1.316864,1.184357,1.195817,1.137701,1.152109,1.167516,1.113529,1.132450,0.871425,1.109427,0.993555,0.882208,0.775560,0.069002,0.136509
6035997,6035998,125749,50,10,2.571408,1.558978,1,390.156772,100.256414,500,0.033447,0.066804,0.100216,0.133622,0.166950,0.200288,0.233724,0.267096,1.488497,1.489714,1.420711,1.353205,1.357586,1.362261,1.367263,1.372588,1.237674,1.316864,1.184357,1.195817,1.137701,1.152109,1.167516,1.113529,1.132450,0.871425,1.109427,0.993555,0.882208,-0.001217,0.067785
6035998,6035999,125749,50,10,2.604744,1.272663,1,391.429435,102.861158,500,0.033337,0.066784,0.100141,0.133553,0.166959,0.200286,0.233625,0.267061,1.558978,1.488497,1.489714,1.420711,1.353205,1.357586,1.362261,1.367263,1.372588,1.237674,1.316864,1.184357,1.195817,1.137701,1.152109,1.167516,1.113529,1.132450,0.871425,1.109427,0.993555,0.070481,0.069264


In [None]:
import numpy as np
from sklearn.metrics import mean_absolute_error  
#from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
print('Test rmse:', np.sqrt(mean_squared_error(y_test, Y_pred)))
print("Mean Absolute Error: ", mean_absolute_error(y_test,Y_pred))

#print("Mean Absolute Percentage Error: ", mean_absolute_percentage_error(y_test,y_pred))

print("r2 score: ", r2_score(y_test,Y_pred))

print("Mean Squared Error: ", mean_squared_error(y_test,Y_pred))

Unnamed: 0,pressure
0,5.837492
1,5.907794
2,7.876254
3,11.742872
4,12.234987
...,...
6035995,3.869032
6035996,3.869032
6035997,3.798729
6035998,4.079938


In [None]:
from xgboost import plot_importance
import matplotlib.pyplot as plt
def plot_features(booster, figsize):    
    fig, ax = plt.subplots(1,1,figsize=figsize)
    return plot_importance(booster=booster, ax=ax)
plot_features(model_xgb, (14,14))

(6036000, 7)