# TPOT - Credit Card Fraud

> ### Install TPOT

In [0]:
# Installs TPOT libraries.
!pip install tpot

> ### Import Libraries


In [0]:
import numpy as np
import pandas as pd
from tpot import TPOTClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

> ### Import Dataset

In [0]:
url = 'https://raw.githubusercontent.com/jporeilly/Machine--Learning/master/Lab_01_AutoML/data/TPOT.csv' 
dataset = pd.read_csv(url, sep= ';', header=None)
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 8].values

> #### Check Dataset

In [0]:
dataset.head()

In [0]:
print (x)

In [0]:
print (y)

> ### Add Column Headers

In [0]:
dataset.columns = ['first_time_customer','order_dollar_amount','num_items','age','web_order','total_transactions_to_date','hour_of_day','billing_shipping_zip_equal','reported_as_fraud_historic']

> #### Check Dataset

In [0]:
dataset.head()

> ### Convert Dataset to Numpy Array and Fit (optional)

In [0]:
x = dataset.iloc[:,0:-1].values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
X=np.asarray(x_scaled)
y=np.asarray(dataset.iloc[:,-1])

> #### Check Dataset

In [0]:
dataset.head()

> ### Splitting the Dataset: Train / Test

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.75, random_state=None)

> ### TPOT Classifier

In [0]:
tpot = TPOTClassifier(generations=1, verbosity=2, population_size=100, scoring='accuracy', n_jobs = -1, config_dict='TPOT light')
tpot.fit(X_train, y_train)
output_score=str(tpot.score(X_test, y_test))
print(tpot.fitted_pipeline_)

> #### Export Pipeline as Python script

In [0]:
tpot.export('tpot_exported_credit_card_pipeline.py')
from google.colab import files
files.download('tpot_exported_credit_card_pipeline.py')

>#### TPOT Evaluated Pipelines

In [139]:
print(tpot.evaluated_individuals_)

{'GaussianNB(input_matrix)': {'generation': 0, 'mutation_count': 0, 'crossover_count': 0, 'predecessor': ('ROOT',), 'operator_count': 1, 'internal_cv_score': 0.8201643643643644}, 'KNeighborsClassifier(VarianceThreshold(input_matrix, VarianceThreshold__threshold=0.2), KNeighborsClassifier__n_neighbors=61, KNeighborsClassifier__p=2, KNeighborsClassifier__weights=uniform)': {'generation': 0, 'mutation_count': 0, 'crossover_count': 0, 'predecessor': ('ROOT',), 'operator_count': 2, 'internal_cv_score': 0.7735567567567567}, 'BernoulliNB(input_matrix, BernoulliNB__alpha=0.001, BernoulliNB__fit_prior=False)': {'generation': 0, 'mutation_count': 0, 'crossover_count': 0, 'predecessor': ('ROOT',), 'operator_count': 1, 'internal_cv_score': 0.7133441441441442}, 'KNeighborsClassifier(SelectFwe(input_matrix, SelectFwe__alpha=0.028), KNeighborsClassifier__n_neighbors=43, KNeighborsClassifier__p=2, KNeighborsClassifier__weights=uniform)': {'generation': 0, 'mutation_count': 0, 'crossover_count': 0, 'pr