In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from lightgbm import LGBMRegressor as lgb

from sklearn import preprocessing
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings("ignore")

# 1. Import data

In [None]:
sample_submission = pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/sample_submission.csv")
train = pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/train.csv")
test = pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/test.csv")

In [None]:
sample_submission

In [None]:
sample_submission.info()

In [None]:
train

In [None]:
train.info()

In [None]:
test

In [None]:
test.info()

# 2. EDA

In [None]:
train['date_time_2'] = pd.to_datetime(train['date_time'])
train['hour'] = train['date_time_2'].dt.hour
test['date_time_2'] = pd.to_datetime(test['date_time'])
test['hour'] = test['date_time_2'].dt.hour

In [None]:
sns.catplot(x="hour", y="target_carbon_monoxide", data=train, kind="violin")
plt.show()
sns.catplot(x="hour", y="target_benzene", data=train, kind="violin")
plt.show()
sns.catplot(x="hour", y="target_nitrogen_oxides", data=train, kind="violin")
plt.show()

# 3. Preprosessing

In [None]:
# Concat train and test
all = pd.concat([train,test],ignore_index=True)

# Scaling
deg_C = all['deg_C']
deg_C = np.array(deg_C)
all['deg_C_2'] = preprocessing.minmax_scale(deg_C[:])
relative_humidity = all['relative_humidity']
relative_humidity = np.array(relative_humidity)
all['relative_humidity_2'] = preprocessing.minmax_scale(relative_humidity[:])
absolute_humidity = all['absolute_humidity']
absolute_humidity = np.array(absolute_humidity)
all['absolute_humidity_2'] = preprocessing.minmax_scale(absolute_humidity[:])
sensor_1 = all['sensor_1']
sensor_1 = np.array(sensor_1)
all['sensor_1_2'] = preprocessing.minmax_scale(sensor_1[:])
sensor_2 = all['sensor_2']
sensor_2 = np.array(sensor_2)
all['sensor_2_2'] = preprocessing.minmax_scale(sensor_2[:])
sensor_3 = all['sensor_3']
sensor_3 = np.array(sensor_3)
all['sensor_3_2'] = preprocessing.minmax_scale(sensor_3[:])
sensor_4 = all['sensor_4']
sensor_4 = np.array(sensor_4)
all['sensor_4_2'] = preprocessing.minmax_scale(sensor_4[:])
sensor_5 = all['sensor_5']
sensor_5 = np.array(sensor_5)
all['sensor_5_2'] = preprocessing.minmax_scale(sensor_5[:])

# Split all for train and test
train_scale = all.iloc[train.index[0]:train.index[-1]+1].drop(columns=["deg_C", "relative_humidity", "absolute_humidity", "sensor_1", "sensor_2", "sensor_3", "sensor_4", "sensor_5"])
test_scale = all.iloc[train.index[-1]+1:].drop(columns=["target_carbon_monoxide", "target_benzene", "target_nitrogen_oxides", "deg_C", "relative_humidity", "absolute_humidity", "sensor_1", "sensor_2", "sensor_3", "sensor_4", "sensor_5"])
test_scale = pd.DataFrame.reset_index(test_scale).drop(columns=["index"])

In [None]:
train_scale

In [None]:
test_scale

In [None]:
corr = train_scale.corr()
plt.figure(figsize=(20,10))
sns.heatmap(corr, vmin=-1, vmax=1, center=0, square=False, annot=True, cmap='coolwarm')
plt.show()

# 4. Modeling

In [None]:
columns = test_scale.columns.drop(['date_time', 'date_time_2'])
columns

In [None]:
X = train_scale[columns].values
X_test = test_scale[columns].values
value_1 = train_scale['target_carbon_monoxide'].values.reshape(-1,1)
value_2 = train_scale['target_benzene'].values.reshape(-1,1)
value_3 = train_scale['target_nitrogen_oxides'].values.reshape(-1,1)

In [None]:
model_1 = lgb()
model_1.fit(X, value_1)
model_2 = lgb()
model_2.fit(X, value_2)
model_3 = lgb()
model_3.fit(X, value_3)

# 5. Prediction

In [None]:
sample_submission['target_carbon_monoxide'] = model_1.predict(X_test)
sample_submission['target_benzene'] = model_2.predict(X_test)
sample_submission['target_nitrogen_oxides'] = model_3.predict(X_test)

sample_submission

# 6. Make submission file

In [None]:
sample_submission.to_csv('submission.csv', index=False)