In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
train0 = pd.read_csv("../input/tabular-playground-series-jul-2021/train.csv")
test0 = pd.read_csv("../input/tabular-playground-series-jul-2021/test.csv")
sample0 = pd.read_csv("../input/tabular-playground-series-jul-2021/sample_submission.csv")
print(train0.shape, test0.shape, sample0.shape)

In [None]:
train0.head()

In [None]:
train0.date_time = pd.to_datetime(train0.date_time)
test0.date_time = pd.to_datetime(test0.date_time)

In [None]:
def getFeatures(df): 
    hour = df["date_time"].dt.hour
    weekday = df["date_time"].dt.dayofweek
    
    df["hour_sin"] = np.sin(2*np.pi*(hour/hour.max()))
    df["weekday_sin"] = np.sin(2*np.pi*(weekday/weekday.max()))
    
    df["hour_cos"] = np.cos(2*np.pi*(hour/hour.max()))
    df["weekday_cos"] = np.cos(2*np.pi*(weekday/weekday.max()))
    
    df["is_weekday"] = df["date_time"].dt.day_name().isin(["Saturday", "Sunday"])
    
    return df

In [None]:
train = getFeatures(train0)
test = getFeatures(test0)

In [None]:
features = test.columns[1:]
features

In [None]:
all_features = pd.concat([train[features], test[features]])
train_dates = train.date_time
test_dates = test.date_time

print("Train dimension: ", train[features].shape)
print("Test dimension: ", test[features].shape)
print("Combined dataframe dimension: ", all_features.shape)

In [None]:
targets = train0.iloc[:,9:12]
targets.head()

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_log_error
from sklearn.model_selection import GridSearchCV

In [None]:
scaler = StandardScaler()
scaled_features = pd.DataFrame(scaler.fit_transform(all_features), columns = features)
scaled_train = scaled_features[:train.shape[0]]
scaled_test = scaled_features[train.shape[0]:]
print(scaled_train.shape, scaled_test.shape)
x_train, x_test, y_train, y_test = train_test_split(scaled_train, targets, test_size = 0.3, random_state = 2)

In [None]:
rf = RandomForestRegressor(n_estimators=2000, n_jobs = -1, random_state = 42)
rf.fit(x_train, y_train)

In [None]:
# gridTuning = GridSearchCV(rf, param_grid=param_grid, n_jobs = 4, cv = 10)
# gridTuning.fit(x_train, y_train)

In [None]:
rf_predicts = rf.predict(x_test)

In [None]:
test_pred_df = pd.DataFrame(rf_predicts)

In [None]:
mean_absolute_error(rf_predicts, y_test)

In [None]:
CO_rmsle = np.sqrt(mean_squared_log_error(test_pred_df.iloc[:,0], y_test.iloc[:,0]))
C6H6_rmsle = np.sqrt(mean_squared_log_error(test_pred_df.iloc[:,1], y_test.iloc[:,1]))
NO_rmsle = np.sqrt(mean_squared_log_error(test_pred_df.iloc[:,2], y_test.iloc[:,2]))

In [None]:
(CO_rmsle+C6H6_rmsle+NO_rmsle)/3

In [None]:
np.sqrt(mean_squared_log_error(rf_predicts, y_test))

In [None]:
submissions = pd.DataFrame(rf.predict(scaled_test), columns = targets.columns)
submissions["date_time"] = test_dates
submissions = submissions[["date_time"] + list(targets.columns)]
submissions.head()

In [None]:
submissions.to_csv("submissions.csv", index = False)