In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from fbprophet import Prophet
from sklearn.metrics import *

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
df_train = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/train.csv')
df_test = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/test.csv')
df_train.date_time = df_train.date_time.astype('datetime64[ns]')
df_test.date_time = df_test.date_time.astype('datetime64[ns]')


In [None]:
df_train

In [None]:
df_test

In [None]:
df_train.describe().T

In [None]:
df_test.describe().T

In [None]:
# Concatenate train and test datasets
df_all_data = pd.concat([df_train.drop(['target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides'],axis=1), df_test], axis = 0)


In [None]:
df_all_data

In [None]:
fig = plt.figure(figsize = (12, 8))
for i in range(len(df_all_data.columns)-1):
    fig.add_subplot(np.ceil(len(df_all_data.columns)/5), 5, i+1)
    df_all_data.iloc[:, i+1].hist(bins = 20)
    plt.title(f'{df_all_data.columns[i+1]}')
plt.show()

In [None]:
corr = df_all_data.corr()
mask = np.triu(np.ones_like(corr, dtype = bool))

plt.figure(figsize = (5, 5))
plt.title('Corelation matrix')
sns.heatmap(corr, mask = mask, cmap = 'Spectral_r', linewidths = .5)
plt.show()

In [None]:
corr = df_train.corr()
mask = np.triu(np.ones_like(corr, dtype = bool))

plt.figure(figsize = (5, 5))
plt.title('Corelation matrix')
sns.heatmap(corr, mask = mask, cmap = 'Spectral_r', linewidths = .5)
plt.show()

In [None]:

mday = pd.to_datetime('2010-6-01')

df_train1 = df_train[df_train.date_time >= mday]
df_val = df_train[df_train.date_time < mday]
df_train1.columns


### Use Prophet and predict each target data


In [None]:
def create_data(target):
    #train = df_train1[['date_time', 'deg_C', 'relative_humidity', 'absolute_humidity','sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5',target]]
    train = df_train[['date_time', 'deg_C', 'relative_humidity', 'absolute_humidity','sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5',target]]
    train.columns = ['ds', 'deg_C', 'relative_humidity', 'absolute_humidity','sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5','y']
    train
    val = df_val[['date_time', 'deg_C', 'relative_humidity', 'absolute_humidity','sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5',target]]
    val.columns = ['ds', 'deg_C', 'relative_humidity', 'absolute_humidity','sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5','y']
    val
    test = df_test[['date_time', 'deg_C', 'relative_humidity', 'absolute_humidity','sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5']]
    test.columns = ['ds', 'deg_C', 'relative_humidity', 'absolute_humidity','sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5']
    test

    #m1 = Prophet(yearly_seasonality=False,weekly_seasonality=False,daily_seasonality=True,seasonality_mode='multiplicative')
    m1 = Prophet(yearly_seasonality=False,weekly_seasonality=False,daily_seasonality=True)
    
    
    '''
    if target == 'target_nitrogen_oxides':
        # 温度データ、湿度データ、センサーデータを特徴量に追加
        m1.add_regressor('deg_C')
        m1.add_regressor('sensor_1')
        m1.add_regressor('sensor_2')
        m1.add_regressor('sensor_3')
        m1.add_regressor('sensor_5')
        
    else:
        
        # 温度データ、湿度データ、センサーデータを特徴量に追加
        m1.add_regressor('deg_C')
        m1.add_regressor('relative_humidity')
        m1.add_regressor('absolute_humidity')
        m1.add_regressor('sensor_1')
        m1.add_regressor('sensor_2')
        m1.add_regressor('sensor_3')
        m1.add_regressor('sensor_4')
        m1.add_regressor('sensor_5')
    '''
    
    # 温度データ、湿度データ、センサーデータを特徴量に追加
    m1.add_regressor('deg_C')
    m1.add_regressor('relative_humidity')
    m1.add_regressor('absolute_humidity')
    m1.add_regressor('sensor_1')
    m1.add_regressor('sensor_2')
    m1.add_regressor('sensor_3')
    m1.add_regressor('sensor_4')
    m1.add_regressor('sensor_5')
    
    

    m1.fit(train)
    
    # valデータによる予測
    val_predict = m1.predict(val)
    ypred1 = val_predict[['yhat']].values
    # マイナスの値は0に変換
    ypred1 = np.maximum(ypred1,0)
    ytest1 = val['y'].values
    r2score = r2_score(ytest1,ypred1)
    rmsle = np.sqrt(mean_squared_log_error(ytest1,ypred1))
    
    m1.plot(val_predict)
    plt.show()

    print(f'{target} R2 SCORE：{r2score:.4f}')
    print(f'{target} RMSLE：{rmsle:.4f}')
    
    # 実際のテストデータからの予測を実施
    predict = m1.predict(test)
    
    m1.plot(predict)
    plt.show()
    
    return  predict


In [None]:
predict_result = []
for target in df_train.columns[-3:]:
    predict_result.append(create_data(target))

In [None]:
predict_result[0]

In [None]:
df_submision = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/sample_submission.csv')
df_submision

In [None]:
# 予測値を出力、予測がマイナスの場合は0に変換
df_submision.target_carbon_monoxide = np.maximum(predict_result[0]['yhat'].values,0)
df_submision.target_benzene = np.maximum(predict_result[1]['yhat'].values,0)
df_submision.target_nitrogen_oxides = np.maximum(predict_result[2]['yhat'].values,0)
#df_submision.target_nitrogen_oxides = 300.0


In [None]:
df_submision

In [None]:
df_submision.describe()

In [None]:
df_submision.to_csv('prophet_3.csv',index=None)