## TPOT AutoML Sample Code

In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
%matplotlib inline

In [2]:
!pip install tpot

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Classification

#### Load Data
TPOT will not work with the raw titanic csv file it requires the dataset to be processed. Due to this requirement I have used the processed titanic dataset instead of the unprocessed one.

In [3]:
# TPOT does not work with the raw titanic csv file 
# So, we use titanic_processed.csv data
titanic = pd.read_csv('titanic_processed.csv')

In [4]:
titanic_train_nolabel = titanic.drop('Survived',axis=1)
titanic_train_label = titanic['Survived']

#### Split the data into training and test sets

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(titanic_train_nolabel,titanic_train_label,test_size=0.2, random_state=42)

In [6]:
from tpot import TPOTClassifier
tpot = TPOTClassifier(verbosity=2,random_state=66, max_time_mins=10)

#the higher the number for generations and population_size, the more potent TPOT can be.
#tpot_titanic =  TPOTClassifier(generations = 100, 
#                           population_size = 100,
#                           verbosity = 2, 
#                           scoring="accuracy",
#                           max_time_mins = 30, 
#                           random_state = 42,
#                           n_jobs=-1)

#### Training

In [None]:
tpot.fit(X_train, y_train)

Optimization Progress:   0%|          | 0/100 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.8244361272530286

Generation 2 - Current best internal CV score: 0.8272333300502315


#### Evaluating

In [None]:
testsc=  {tpot.score(X_test, y_test)}
print ('Test '+ str(testsc))
trainsc=  {tpot.score(X_train, y_train)}
print ('Train '+ str(trainsc))

#### Exporting the model

In [None]:
tpot.export('tpot_exported_pipeline.py')

## Regression



#### Load Data

In [None]:
cars = pd.read_csv('cars_processed.csv')
cars= cars.dropna()

In [None]:
X = cars.drop(['price_usd',],axis=1)
y = cars['price_usd']

### Train

In [None]:
from tpot import TPOTRegressor

tpot = TPOTRegressor(verbosity = 2, random_state=1502, max_time_mins=10)

#the higher the number for generations and population_size, you can expect better results
#tpot_cars =  TPOTRegressor(generations = 100, 
#                           population_size = 100,
#                           verbosity = 2, 
#                           scoring="neg_mean_absolute_error",
#                           max_time_mins = 60, 
#                           random_state = 42,
#                           n_jobs=-1)


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)
tpot.fit(X_train, y_train)

#### Evaluating

In [None]:
testsc=  {tpot.score(X_test, y_test)}
print ('Test '+ str(testsc))
trainsc=  {tpot.score(X_train, y_train)}
print ('Test '+ str(trainsc))

#### Export the model

In [None]:
tpot.export('tpot_exported_pipeline.py')