In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
dataset=pd.read_csv("../input/tabular-playground-series-jul-2021/train.csv")
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.date_time=pd.to_datetime(dataset.date_time)
dataset.info()

In [None]:
carbon_monoxide= pd.DataFrame(columns=['target'])
carbon_monoxide.head()

In [None]:
carbon_monoxide.target=dataset['target_carbon_monoxide']
carbon_monoxide.head()

In [None]:
benzene= pd.DataFrame(columns=['target'])
benzene.head()

In [None]:
benzene.target=dataset['target_benzene']
benzene.head()

In [None]:
nitrogen_oxides= pd.DataFrame(columns=['target'])
nitrogen_oxides.head()

In [None]:
nitrogen_oxides.target=dataset['target_nitrogen_oxides']
nitrogen_oxides.head()

In [None]:
dataset.columns

In [None]:
dataset=dataset.drop(['target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides'], axis = 1)
dataset.columns

In [None]:
carbon_monoxide=pd.concat([carbon_monoxide, dataset], axis=1)
carbon_monoxide.head()

In [None]:
benzene=pd.concat([benzene, dataset], axis=1)
benzene.head()

In [None]:
nitrogen_oxides=pd.concat([nitrogen_oxides, dataset], axis=1)
nitrogen_oxides.head()

In [None]:
!pip install pandas-profiling

In [None]:
from pandas_profiling import ProfileReport
ProfileReport(carbon_monoxide, title="EDA Report")

In [None]:
from pandas_profiling import ProfileReport
ProfileReport(benzene, title="EDA Report")

In [None]:
from pandas_profiling import ProfileReport
ProfileReport(nitrogen_oxides, title="EDA Report")

In [None]:
!pip install pycaret

In [None]:
from pycaret.regression import *

In [None]:
reg_experiment = setup(carbon_monoxide, 
                       target = 'target', 
                       session_id=42, 
                       normalize = True, 
                  transformation = True, 
                  remove_multicollinearity = True, #rop one of the two features that are highly correlated with each other
                  ignore_low_variance = True,#all categorical features with statistically insignificant variances are removed from the dataset.
                  combine_rare_levels = True,# all levels in categorical features below the threshold defined in rare_level_threshold param are combined together as a single level
                    transform_target = True,
                       date_features=['date_time'],
                       numeric_imputation='median',
                  categorical_imputation='mode',
                        train_size = 0.8, # training over 80% of available data
                  handle_unknown_categorical = True, 
                       unknown_categorical_method = 'most_frequent')

In [None]:
compare_models()

In [None]:
catboost = create_model('catboost')

In [None]:
print(catboost)

In [None]:
tuned_catboost = tune_model(catboost,optimize = 'RMSE')

In [None]:
evaluate_model(tuned_catboost)

In [None]:
predict_model(tuned_catboost)

In [None]:
test=pd.read_csv('../input/tabular-playground-series-jul-2021/test.csv')
test.head()

In [None]:
sample_submission = predict_model(tuned_catboost, data=test)
sample_submission.head()

In [None]:
submission= pd.DataFrame(columns=['date_time'])
submission.head()

In [None]:
submission.date_time=sample_submission.date_time
submission.head()

In [None]:
submission['target_carbon_monoxide']=sample_submission['Label']
submission.head()

In [None]:
reg_experiment = setup(benzene, 
                       target = 'target', 
                       session_id=42, 
                       normalize = True, 
                  transformation = True, 
                  remove_multicollinearity = True, #rop one of the two features that are highly correlated with each other
                  ignore_low_variance = True,#all categorical features with statistically insignificant variances are removed from the dataset.
                  combine_rare_levels = True,# all levels in categorical features below the threshold defined in rare_level_threshold param are combined together as a single level
                    transform_target = True,
                       date_features=['date_time'],
                       numeric_imputation='median',
                  categorical_imputation='mode',
                        train_size = 0.8, # training over 80% of available data
                  handle_unknown_categorical = True, 
                       unknown_categorical_method = 'most_frequent')

In [None]:
compare_models()

In [None]:
catboost = create_model('catboost')

In [None]:
print(catboost)

In [None]:
tuned_catboost = tune_model(catboost,optimize = 'RMSE')

In [None]:
evaluate_model(tuned_catboost)

In [None]:
predict_model(tuned_catboost)

In [None]:
sample_submission = predict_model(tuned_catboost, data=test)
sample_submission.head()

In [None]:
submission['target_benzene']=sample_submission['Label']
submission.head()

In [None]:
reg_experiment = setup(nitrogen_oxides, 
                       target = 'target', 
                       session_id=42, 
                       normalize = True, 
                  transformation = True, 
                  remove_multicollinearity = True, #rop one of the two features that are highly correlated with each other
                  ignore_low_variance = True,#all categorical features with statistically insignificant variances are removed from the dataset.
                  combine_rare_levels = True,# all levels in categorical features below the threshold defined in rare_level_threshold param are combined together as a single level
                    transform_target = True,
                       date_features=['date_time'],
                       numeric_imputation='median',
                  categorical_imputation='mode',
                        train_size = 0.8, # training over 80% of available data
                  handle_unknown_categorical = True, 
                       unknown_categorical_method = 'most_frequent')

In [None]:
compare_models()

In [None]:
et = create_model('et')

In [None]:
print(et)

In [None]:
tuned_et = tune_model(et,optimize = 'RMSE')

In [None]:
evaluate_model(tuned_et)

In [None]:
predict_model(tuned_et)

In [None]:
sample_submission = predict_model(tuned_et, data=test)
sample_submission.head()

In [None]:
submission['target_nitrogen_oxides']=sample_submission['Label']
submission.head()

In [None]:
submission.to_csv('final.csv', index=False)