# Low-Code AutoML with PyCaret

In [None]:
!python --version

In [None]:
import pycaret
pycaret.__version__

## Binary Classification

In [None]:
# loading sample dataset from pycaret dataset module
from pycaret.datasets import get_data
data = get_data('diabetes')

In [None]:
data.info()

In [None]:
data['Class variable'].value_counts()

In [None]:
# import pycaret classification and init setup
from pycaret.classification import *
s = setup(data, target = 'Class variable', session_id = 123)

In [None]:
# compare baseline models
best = compare_models()

In [None]:
best

In [None]:
# predict on test set
holdout_pred = predict_model(best)

In [None]:
# show predictions df
holdout_pred

In [None]:
evaluate_model(best)

In [None]:
# plot confusion matrix
plot_model(best, plot = 'confusion_matrix')

In [None]:
# plot AUC
plot_model(best, plot = 'auc')

In [None]:
evaluate_model(best)

In [None]:
# plot feature importance
plot_model(best, plot = 'feature')

### Saving and Loading a model

In [None]:
# save pipeline
save_model(best, 'diabetes_model')

In [None]:
loaded_model_pipeline = load_model('diabetes_model')
loaded_model_pipeline

In [None]:
knn = create_model('knn')

In [None]:
plot_model(knn, plot = 'confusion_matrix')

In [None]:
s.X_test

In [None]:
s.X_test_transformed

In [None]:
new_data = s.X_test_transformed

In [None]:
knn.predict(new_data)

In [None]:
predict_model(knn)

In [None]:
predict_model(knn, new_data)

### Deploying

In [None]:
import gradio
gradio.__version__

In [None]:
create_app(knn)


In [None]:
create_api(knn, 'knn_api')

In [None]:
# create_docker(knn)

## Multiclass Classification

In [None]:
# loading sample dataset from pycaret dataset module
from pycaret.datasets import get_data
data = get_data('iris')

In [None]:
data['species'].value_counts()

In [None]:
s = setup(data, target = 'species', session_id = 123)

In [None]:
?compare_models

In [None]:
# compare baseline models
best = compare_models(sort='AUC')

In [None]:
# predict on test set
holdout_pred = predict_model(best)
holdout_pred.head()

In [None]:
help(plot_model)

In [None]:
plot_model(best, plot = 'confusion_matrix')

In [None]:
# plot feature importance
plot_model(best, plot = 'feature')

In [None]:
# save pipeline
save_model(best, 'my_iris_prediction')

In [None]:
loaded_model_pipeline = load_model('my_iris_prediction')
loaded_model_pipeline

## Regression

In [None]:
from pycaret.regression import *

data = get_data('insurance')
s = setup(data, target = 'charges', session_id = 123)

In [None]:
?compare_models

In [None]:
# compare baseline models
best = compare_models()

In [None]:
# best = create_model('catboost')

In [None]:
# predict on test set
holdout_pred = predict_model(best)
# show predictions df
holdout_pred.head()

In [None]:
plot_model(best, plot = 'residuals')

In [None]:
# plot error
plot_model(best, plot = 'error')

In [None]:
# plot feature importance
plot_model(best, plot = 'feature')

In [None]:
# save pipeline
save_model(best, 'regression_model')
# load pipeline
loaded_model_pipeline = load_model('regression_model')
loaded_model_pipeline

In [None]:
create_app(loaded_model_pipeline)

## Clustering

In [None]:
from pycaret.clustering import *

data = get_data('jewellery')

In [None]:
s = setup(data, session_id = 123)

In [None]:
models()

In [None]:
# train kmeans model
kmeans = create_model('kmeans')

In [None]:
dbscan = create_model('birch')

In [None]:
kmeans_cluster = assign_model(kmeans)

In [None]:
kmeans_cluster

In [None]:
kmeans_cluster['Cluster'].value_counts()

In [None]:
plot_model(kmeans, plot = 'cluster')

In [None]:
# plot elbow
plot_model(kmeans, plot = 'elbow')

In [None]:
# plot silhouette
plot_model(kmeans, plot = 'silhouette')

In [None]:
# save pipeline
save_model(kmeans, 'kmeans_pipeline')

In [None]:
# load pipeline
kmeans_pipeline = load_model('kmeans_pipeline')


In [None]:
plot_model(kmeans_pipeline,  plot = 'cluster')

## Student Activity
* Load the credit_data_train.csv 
* Train a model to predict `credit_risk`
* Once model is trained, load the `test_set.csv` 
* Remove the `credit_risk` in that dataset. This reflects actuals so you can compare how good the model did
* Predict using the test_set to get the estimated credit_risk from the model and compare agaist the actuals

In [None]:
import pandas as pd

In [None]:
df = # YOUR CODE 
df.head()

In [None]:
# Check shape of the set

In [None]:
# Configure your model 


In [None]:
# Train a lightgbm model

In [None]:
# Check the transformation made on the input data set

In [None]:
# Plot the confusion matrix 

In [None]:
# Read the test set 

In [None]:
# Predict using the model and compare results

## Time Series

In [None]:
# load sample dataset
from pycaret.datasets import get_data
data = get_data('airline')


In [None]:
data.plot()

In [None]:
from pycaret.time_series import *
s = setup(data, fh = 3, fold = 5, session_id = 123)


In [None]:
?compare_models

In [None]:

best = compare_models()

In [None]:

plot_model(best, plot = 'forecast', data_kwargs = {'fh' : 24})

In [None]:

plot_model(best, plot = 'diagnostics')

In [None]:

plot_model(best, plot = 'insample')

In [None]:

final_best = finalize_model(best)
predict_model(best, fh = 24)

In [None]:
final_best