## Chained Multioutput Regression

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from math import sqrt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

#### Read the train data set

In [None]:
df_train = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/train.csv', index_col=0)
df_train.index = pd.to_datetime(df_train.index)
df_train

#### There are no Nan or empty values to be filled or fixed. So no cleaning is required.

#### Deciding target variables and features

In [None]:
X = df_train.drop(columns=['target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides'])
y = df_train[['target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides']]

#### Train/Validation split of 80/20

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=53)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

### Regressor Chain MultiOutput

RegressorChain helps to partition into three dependent single-output regression problems:
  
Regressor 1: Given X,  predict **'target_carbon_monoxide'**.  

Regressor 2: Given X and 'target_carbon_monoxide',  predict **'target_benzene'**.  

Regressor 3: Given X, 'target_carbon_monoxide', and 'target_benzene',  predict **'target_nitrogen_oxides'**.

#### 1. XGBRegressor

In [None]:
from xgboost import XGBRegressor
from sklearn.multioutput import RegressorChain
from sklearn.metrics import mean_squared_error

# define base model
xgbmodel = XGBRegressor(random_state=53, max_depth=7, n_estimators=300, learning_rate=0.1)

# define the chained multioutput wrapper model
xgbwrapper = RegressorChain(xgbmodel, order=[0,1,2])

# fit the model
xgbwrapper.fit(X_train, y_train)

# score on test
print('Test Score :', xgbwrapper.score(X_test, y_test))

y_pred = xgbwrapper.predict(X_test)

rmse = sqrt(mean_squared_error(y_test, y_pred))
print('RMSE :', rmse)

#### 2. KNN with MinMax Norm

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import MinMaxScaler

knnmodel = KNeighborsRegressor(n_neighbors=7)
minmax = MinMaxScaler()

X_train_norm = minmax.fit_transform(X_train)
X_test_norm = minmax.transform(X_test)

# define the chained multioutput wrapper model
knnwrapper = RegressorChain(knnmodel, order=[0,1,2])

# fit the model
knnwrapper.fit(X_train_norm, y_train)

# score on test
print('Test Score :', knnwrapper.score(X_test_norm, y_test))

y_pred = knnwrapper.predict(X_test_norm)

rmse = sqrt(mean_squared_error(y_test, y_pred))
print('RMSE :', rmse)

#### 3. KNN without norm

In [None]:
# fit the model
knnwrapper.fit(X_train, y_train)

# score on test
print('Test Score :', knnwrapper.score(X_test, y_test))

y_pred = knnwrapper.predict(X_test)

rmse = sqrt(mean_squared_error(y_test, y_pred))
print('RMSE :', rmse)

#### Reading test data for future predictions and submission

In [None]:
df_test = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/test.csv', index_col=0)
df_test

Future Predictions

In [None]:
future_prediction = xgbwrapper.predict(df_test)
submission = pd.DataFrame(future_prediction, columns=['target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides'], index=df_test.index)
submission

Output file for submission

In [None]:
print(submission.shape)
submission.to_csv('/kaggle/working/final_submission')