In [None]:
import pandas as pd

In [None]:
# install PyCaret
#!pip install pycaret

In [None]:
#---- pyCaret ----
#PyCaret regressor library
from pycaret.regression import *

#PyCaret metric function
from pycaret.utils import check_metric

In [None]:
# Load  Train and test file
df = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/train.csv')
df_test = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/test.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
# Vérification des données manquantes et des valeurs uniques
df_nb = df.shape[0]

# Nombre de lignes et de colonnes que df contient 
print("Le df contient {} lignes et {} colonnes".format(df.shape[0],df.shape[1]))

# Existe t'il des doublons dans la colonne date_time
print("Nombre de doublons sur le champ date_time = ", df_nb - df["date_time"].nunique())

#Type de data, nombre de données manquantes et pourcentage de données manquantes
df_missing_data = pd.concat([df.dtypes,df.isnull().sum(),
                      (df.isnull().sum() / df.shape[0]) * 100,
                       df.nunique()],axis=1)

df_missing_data.columns = ["Type","Is null","Pourcentage de Null","Nb Unique"]
df_missing_data.sort_values(by = ["Type","Nb Unique"],ascending = False)

In [None]:
df.describe()

In [None]:
df_test.head()

In [None]:
df_test.describe()

In [None]:
data_train = df.sample(frac=0.8, random_state=10)
data_unseen = df.drop(data_train.index)

data_train.reset_index(drop=True, inplace=True)
data_unseen.reset_index(drop=True, inplace=True)

print('Data for Modeling: ' + str(data_train.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

# I- Experimentation target : target_carbon_monoxide

In [None]:
setup_target_carbon_monoxide = setup(data = data_train, target = 'target_carbon_monoxide', session_id=24,
                  normalize = True, 
                  transformation = True, 
                  transform_target = True, 
                  # Ignore features target_benzene, target_nitrogen_oxides and date_time on the experiment 
                  ignore_features = ['target_benzene','target_nitrogen_oxides', 'date_time'], 
                  silent = True                   
)

In [None]:
#comparing alls models
best_target_carbon_monoxide = compare_models(sort = 'RMSLE', n_select = 3) 

In [None]:
#create the models using the default hyperparameters to train the model
model_target_carbon_monoxide = blend_models (best_target_carbon_monoxide)

In [None]:
print(model_target_carbon_monoxide )

In [None]:
#Tuned model
#create the model using the optimization of the hyperparameters to train the model
Tuned_model_target_carbon_monoxide  = tune_model(model_target_carbon_monoxide ) 

In [None]:
print(Tuned_model_target_carbon_monoxide)

In [None]:
#Residual plot
plot_model(Tuned_model_target_carbon_monoxide)

In [None]:
#Prediction error plot on test (30%): Transformed Test Set (1707, 8)
predict_model(Tuned_model_target_carbon_monoxide)
plot_model(Tuned_model_target_carbon_monoxide, plot='error')

In [None]:
#Predict on test (30%): Transformed Test Set (1707, 8)
predict_model(Tuned_model_target_carbon_monoxide)

In [None]:
# Finalize Model Tuned_et_target_carbon_monoxid for Deployment
Final_et_target_carbon_monoxide = finalize_model(Tuned_model_target_carbon_monoxide)

In [None]:
print(Final_et_target_carbon_monoxide)

In [None]:
#predict on unseen data
unseen_predictions_carbon_monoxide = predict_model(Final_et_target_carbon_monoxide, data=data_unseen)
unseen_predictions_carbon_monoxide.head()

In [None]:
# metric R2 for predictions carbon monoxide
check_metric(unseen_predictions_carbon_monoxide.target_carbon_monoxide, unseen_predictions_carbon_monoxide.Label, 'R2')

In [None]:
# metric RMSLE for predictions carbon monoxide
check_metric(unseen_predictions_carbon_monoxide.target_carbon_monoxide, unseen_predictions_carbon_monoxide.Label, 'RMSLE')

In [None]:
#predict on test data
test_predictions_carbon_monoxide = predict_model(Final_et_target_carbon_monoxide, data=df_test)
test_predictions_carbon_monoxide.head()

In [None]:
#Rename label to target_carbon_monoxide (test data)
test_predictions_carbon_monoxide.rename(columns = {"Label":"target_carbon_monoxide"}, inplace = True)
test_predictions_carbon_monoxide.head()

# II - Experimentation target : target_benzene

In [None]:
setup_target_benzene = setup(data = data_train, target = 'target_benzene', session_id=24,
                  normalize = True, 
                  transformation = True, 
                  transform_target = True, 
                  # Ignore features target_benzene, target_carbon_monoxide and date_time on the experiment          
                  ignore_features = ['target_carbon_monoxide','target_nitrogen_oxides', 'date_time'],                
                  silent = True                   
)

In [None]:
#comparing alls models
best_target_benzene = compare_models(sort = 'RMSLE', n_select = 3) 

In [None]:
#create the models using the default hyperparameters to train the model
model_target_benzene = blend_models(best_target_benzene)

In [None]:
print(model_target_benzene)

In [None]:
#Tuned model
#create the model using the optimization of the hyperparameters to train the model
Tuned_model_target_benzene = tune_model(model_target_benzene ) 

In [None]:
#Residual plot
plot_model(Tuned_model_target_benzene)

In [None]:
#Prediction error plot on test (30%): Transformed Test Set (1707, 8)
predict_model(Tuned_model_target_benzene)
plot_model(Tuned_model_target_benzene, plot='error')

In [None]:
#Predict on test (30%): Transformed Test Set (1707, 8)
predict_model(Tuned_model_target_benzene)

In [None]:
# Finalize Model Tuned_et_target_carbon_monoxid for Deployment
Finalize_Tuned_model_target_benzene = finalize_model(Tuned_model_target_benzene)

In [None]:
print(Finalize_Tuned_model_target_benzene)

In [None]:
#predict on unseen data
unseen_predictions_benzene = predict_model(Finalize_Tuned_model_target_benzene, data=data_unseen)
unseen_predictions_benzene.head()

In [None]:
check_metric(unseen_predictions_benzene.target_benzene, unseen_predictions_benzene.Label, 'R2')

In [None]:
check_metric(unseen_predictions_benzene.target_benzene, unseen_predictions_benzene.Label, 'RMSLE')

In [None]:
#predict on test data
test_predictions_benzene = predict_model(Finalize_Tuned_model_target_benzene, data=df_test)
test_predictions_benzene.head()

In [None]:
#Rename label to target_benzene (test data)
test_predictions_benzene.rename(columns = {"Label":"target_benzene"}, inplace = True)
test_predictions_benzene.head()

# III - Experimentation target : target_nitrogen_oxides

In [None]:
setup_target_nitrogen_oxides = setup(data = data_train, target = 'target_nitrogen_oxides', session_id=24,
                  normalize = True, 
                 # train_size = 0.8, 
                  #transformation = True,                                
                  #transform_target = True, 
                  #Ignore features target_benzene, target_carbon_monoxide and date_time on the experiment          
                  ignore_features = ['target_carbon_monoxide','target_benzene','date_time'],                
                  silent = True                   
)

In [None]:
#comparing alls models
best_target_nitrogen_oxides = compare_models(sort = 'RMSLE', n_select=5) 

In [None]:
#create the models using the default hyperparameters to train the model
model_target_nitrogen_oxides = blend_models(best_target_nitrogen_oxides)

In [None]:
print(model_target_nitrogen_oxides)

In [None]:
#Tuned model
#create the models using the optimization of the hyperparameters to train the model
Tuned_model_target_nitrogen_oxides = tune_model(model_target_nitrogen_oxides ) 

In [None]:
#Residual plot
plot_model(Tuned_model_target_nitrogen_oxides)

In [None]:
#Prediction error plot on test (30%): Transformed Test Set (1707, 8)
predict_model(Tuned_model_target_nitrogen_oxides)
plot_model(Tuned_model_target_nitrogen_oxides, plot='error')

In [None]:
#Predict on test (30%): Transformed Test Set (1707, 8)
predict_model(Tuned_model_target_nitrogen_oxides)

In [None]:
# Finalize Model Tuned_et_target_carbon_monoxid for Deployment
Finalize_Tuned_model_target_nitrogen_oxides = finalize_model(Tuned_model_target_nitrogen_oxides)

In [None]:
print(Finalize_Tuned_model_target_nitrogen_oxides)

In [None]:
#predict on unseen data
unseen_predictions_nitrogen_oxides = predict_model(Finalize_Tuned_model_target_nitrogen_oxides, data=data_unseen)
unseen_predictions_nitrogen_oxides.head()

In [None]:
check_metric(unseen_predictions_nitrogen_oxides.target_nitrogen_oxides, unseen_predictions_nitrogen_oxides.Label, 'R2')

In [None]:
check_metric(unseen_predictions_nitrogen_oxides.target_nitrogen_oxides,unseen_predictions_nitrogen_oxides.Label, 'RMSLE')

In [None]:
#predict on test data
test_predictions_nitrogen_oxides = predict_model(Finalize_Tuned_model_target_nitrogen_oxides, data=df_test)
test_predictions_nitrogen_oxides.head()

In [None]:
#Rename label to target_benzene (test data)
test_predictions_nitrogen_oxides.rename(columns = {"Label":"target_nitrogen_oxides"}, inplace = True)
test_predictions_nitrogen_oxides.head()

# IV Submission file

In [None]:
#Predictions target carbon monoxide
test_carbon_monoxide=test_predictions_carbon_monoxide[['date_time', 'target_carbon_monoxide']]
test_carbon_monoxide.round(1).head()

In [None]:
#Predictions target benzene
test_benzene=test_predictions_benzene[['date_time', 'target_benzene']]
test_benzene.round(1).head()

In [None]:
#Predictions target nitrogen oxides
test_nitrogen_oxides=test_predictions_nitrogen_oxides[['date_time', 'target_nitrogen_oxides']]
test_nitrogen_oxides.round(1).head()

In [None]:
# join date_time,target_carbon_monoxide,target_benzene,target_nitrogen_oxides
result = pd.merge(left=test_carbon_monoxide.round(1), right=test_benzene.round(1), left_on='date_time', right_on='date_time')
result = pd.merge(left=result, right=test_nitrogen_oxides.round(1),left_on='date_time', right_on='date_time')

In [None]:
#submission_file 
#result.to_csv('./submission.csv', index=False)

#print the fist twenty firs lines of result
result.head(20)