In [None]:
import pandas as pd
import numpy as np
import datetime

import plotly.express as exp
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
training_data = pd.read_csv('../input/tabular-playground-series-jul-2021/train.csv')
print(training_data.shape)
training_data.date_time = training_data.date_time.astype(np.datetime64)
training_data.head(3)

In [None]:
TRAINING_COLUMNS = ['deg_C', 'relative_humidity', 'absolute_humidity',
       'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5']

figure = make_subplots(rows=len(TRAINING_COLUMNS), cols=1)
for index, col in enumerate(TRAINING_COLUMNS):
    trace = go.Scatter( x=training_data.date_time,
                        y=training_data[col],
                        mode='lines',
                        name=col,
                        showlegend=True
                    )
    figure.append_trace(trace, row=index+1, col=1)
figure.update_layout(height=2500)
figure.show()

In [None]:
from sklearn.model_selection import cross_validate
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [None]:
target = training_data.target_carbon_monoxide
train_dataset = training_data[TRAINING_COLUMNS]
x_train, x_test, y_train, y_test = train_test_split(train_dataset, target, test_size=0.2, random_state=21)

gbr_target_carbon_monoxide = GradientBoostingRegressor()

gbr_target_carbon_monoxide.fit(x_train,y_train)
prediction = gbr_target_carbon_monoxide.predict(x_test)
print("R2 Score : ",r2_score(y_test, prediction))
cross_validate(gbr_target_carbon_monoxide, x_train, y_train, cv=6)

In [None]:
target = training_data.target_nitrogen_oxides
train_dataset = training_data[TRAINING_COLUMNS]
x_train, x_test, y_train, y_test = train_test_split(train_dataset, target, test_size=0.2, random_state=21)

gbr_target_nitrogen_oxides = GradientBoostingRegressor()

gbr_target_nitrogen_oxides.fit(x_train,y_train)
prediction = gbr_target_nitrogen_oxides.predict(x_test)
print("R2 Score : ",r2_score(y_test, prediction))
cross_validate(gbr_target_nitrogen_oxides, x_train, y_train, cv=6)

In [None]:
target = training_data.target_benzene
train_dataset = training_data[TRAINING_COLUMNS]
x_train, x_test, y_train, y_test = train_test_split(train_dataset, target, test_size=0.2, random_state=21)

gbr_target_benzene = GradientBoostingRegressor()

gbr_target_benzene.fit(x_train,y_train)
prediction = gbr_target_benzene.predict(x_test)
print("R2 Score : ",r2_score(y_test, prediction))
cross_validate(gbr_target_benzene, x_train, y_train, cv=6)

In [None]:
test_data = pd.read_csv('../input/tabular-playground-series-jul-2021/test.csv')
date_time = test_data.date_time
test_data.drop(columns=['date_time'], inplace=True)

In [None]:
test_target_carbon_monoxide = gbr_target_carbon_monoxide.predict(test_data)
test_target_nitrogen_oxides = gbr_target_nitrogen_oxides.predict(test_data)
test_target_benzene = gbr_target_benzene.predict(test_data)

In [None]:
submission = pd.DataFrame({
    'date_time':date_time,
    'target_carbon_monoxide': test_target_carbon_monoxide.round(1),
    'target_benzene' : test_target_benzene.round(1),
    'target_nitrogen_oxides' : test_target_nitrogen_oxides.round(1)
})
submission.to_csv('submission.csv', index=False)
submission.head(3)