# TPOT - Credit Card Fraud

> ### Install TPOT

In [None]:
# Installs TPOT libraries.
!pip install tpot

> ### Import Libraries


In [2]:
# import libraries
import numpy as np
import pandas as pd
from tpot import TPOTClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

> ### Import Dataset

In [3]:
# access your local drive. Select:  ~/How-To--Machine-Learning/01 Credit Card/AutoML/data/TPOT.csv
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving TPOT.csv to TPOT.csv
User uploaded file "TPOT.csv" with length 276497 bytes


In [4]:
# Load the TPOT.csv dataset - Headless
dataset = pd.read_csv('TPOT.csv', sep= ';', header=None)
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 8].values

In [5]:
# backup option pulls dataset from GitHub repository.
# url = 'https://github.com/jporeilly/How-To--Machine-Learning/blob/main/data/TPOT.csv' 
# dataset = pd.read_csv(url, sep= ';', header=None)
# x = dataset.iloc[:, :-1].values
# y = dataset.iloc[:, 8].values

> #### Data Exploration

In [None]:
# displays dataset and outputs independent x variables and dependent y variable
dataset.head()

In [None]:
print (x)

In [None]:
print (y)

> ### Add Column Headers

In [9]:
# adds column headers
dataset.columns = ['first_time_customer','order_dollar_amount','num_items','age','web_order','total_transactions_to_date','hour_of_day','billing_shipping_zip_equal','reported_as_fraud_historic']

> #### Check Dataset

In [None]:
# check column headers
dataset.head()

> ### Convert Dataset to Numpy Array and Fit (optional)

In [11]:
# convert to numpy array and fit data
x = dataset.iloc[:,0:-1].values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
X=np.asarray(x_scaled)
y=np.asarray(dataset.iloc[:,-1])

> #### Check Dataset

In [None]:
dataset.head()

> ### Splitting the Dataset: Train / Test

In [13]:
# split the dataset 75% used for test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.75, random_state=None)

> ### TPOT Classifier

In [None]:
# TPOT Classifier
tpot = TPOTClassifier(generations=1, verbosity=2, population_size=100, scoring='accuracy', n_jobs = -1, config_dict='TPOT light')
tpot.fit(X_train, y_train)
output_score=str(tpot.score(X_test, y_test))
print(tpot.fitted_pipeline_)

> #### Export Pipeline as Python script

In [15]:
# export results to python
tpot.export('tpot_exported_credit_card_pipeline.py')
from google.colab import files
files.download('tpot_exported_credit_card_pipeline.py')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

>#### TPOT Evaluated Pipelines

In [None]:
# output as JSON each pipeline
print(tpot.evaluated_individuals_)