In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_log_error

# Load Data

In [None]:
train = pd.read_csv('../input/tabular-playground-series-jul-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-jul-2021/test.csv')
ss = pd.read_csv('../input/tabular-playground-series-jul-2021/sample_submission.csv')

train.head()

# Preprocessing

In [None]:
target, train = train.iloc[:, 9:], train.iloc[:, :9]


def preprocessing(df):
    df.date_time = df.date_time.map(lambda x: float(x[11:13]))
    return df


train = preprocessing(train)
test = preprocessing(test)

train = train.to_numpy()
test = test.to_numpy()
target = target.to_numpy()

# Model

In [None]:
best_model = None
best_score = 999999999.
kf = KFold(n_splits=5, random_state=42, shuffle=True)
for tr_idx, val_idx in kf.split(train, y=target):
    X_train, y_train = train[tr_idx], target[tr_idx]
    X_val, y_val = train[val_idx], target[val_idx]

    regr_carbon = SVR(C=0.1)
    regr_carbon.fit(X=X_train, y=y_train[:, 0])
    regr_benzene = SVR(C=0.1)
    regr_benzene.fit(X=X_train, y=y_train[:, 1])
    regr_nitrogen = SVR(C=1.0)
    regr_nitrogen.fit(X=X_train, y=y_train[:, 2])

    carbon_pred = regr_carbon.predict(X_val)
    carbon_pred[carbon_pred < 0] = 0.000000001
    carbon_score = mean_squared_log_error(y_val[:, 0], carbon_pred)
    
    benzene_pred = regr_benzene.predict(X_val)
    benzene_pred[benzene_pred < 0] = 0.000000001
    benzene_score = mean_squared_log_error(y_val[:, 1], benzene_pred)
    
    nitrogen_pred = regr_nitrogen.predict(X_val)
    nitrogen_pred[nitrogen_pred < 0] = 0.000000001
    nitrogen_score = mean_squared_log_error(y_val[:, 2], nitrogen_pred)

    fold_score = (carbon_score + benzene_score + nitrogen_score) / 3
    print('carbon:', carbon_score, 'benzene:', benzene_score, 'nitrogen:', nitrogen_score, 'mean:', fold_score)
    
    if fold_score < best_score:
        best_score = fold_score
        best_model = (regr_carbon, regr_benzene, regr_nitrogen)

In [None]:
carbon_test_pred = best_model[0].predict(test)
carbon_test_pred[carbon_test_pred < 0] = 0.000000001
ss.iloc[:, 1] = pd.Series(carbon_test_pred)

benzene_test_pred = best_model[1].predict(test)
benzene_test_pred[benzene_test_pred < 0] = 0.000000001
ss.iloc[:, 2] = pd.Series(benzene_test_pred)

nitrogen_test_pred = best_model[2].predict(test)
nitrogen_test_pred[nitrogen_test_pred < 0] = 0.000000001
ss.iloc[:, 3] = pd.Series(nitrogen_test_pred)

ss.to_csv('submission.csv', index=False)