# Adaptive Boosting Method

Determinar se um cliente realizou a compra através de uma propaganda utilizando o método de Adaptive Boosting Method (AdaBoost)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
dados = pd.read_csv('Social_Network_Ads.csv')

In [3]:
dados.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


Coluna User ID não é necessária para o modelo pois é apenas um identificador do usuário

In [4]:
dados.columns

Index(['User ID', 'Gender', 'Age', 'EstimatedSalary', 'Purchased'], dtype='object')

In [5]:
dados = dados.drop('User ID',axis=1)

In [6]:
dados.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


Convertendo a variável categórica Gender para numérica

In [7]:
from sklearn.preprocessing import LabelEncoder

In [8]:
enconder = LabelEncoder()

In [9]:
dados['Gender'] = enconder.fit_transform(dados['Gender'])

In [10]:
dados.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


Normalizando as colunas Age e Estimated Salary

In [11]:
cols = ['Age','EstimatedSalary']

In [12]:
from sklearn.preprocessing import StandardScaler

In [13]:
sc = StandardScaler()

In [14]:
dados[cols] = sc.fit_transform(dados[cols])

In [15]:
dados.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,-1.781797,-1.490046,0
1,1,-0.253587,-1.460681,0
2,0,-1.113206,-0.78529,0
3,0,-1.017692,-0.374182,0
4,1,-1.781797,0.183751,0


Determinando as variáveis X e Y

In [16]:
X = dados.drop('Purchased',axis=1).values
Y = dados['Purchased'].values

Separando em amostra de treino e teste

In [17]:
from sklearn.model_selection import train_test_split

In [18]:
X_treino,X_teste,Y_treino,Y_teste=train_test_split(X,Y,test_size=0.25,random_state=0)

Aplicando modelo AdaBoost

In [19]:
from sklearn.ensemble import AdaBoostClassifier

In [20]:
modelo =AdaBoostClassifier()

In [21]:
modelo.fit(X_treino,Y_treino)

AdaBoostClassifier()

In [22]:
modelo.score(X_treino,Y_treino)

0.94

In [23]:
Y_previsto = modelo.predict(X_teste)

In [24]:
modelo.score(X_teste,Y_teste)

0.92

In [25]:
from sklearn.metrics import confusion_matrix

In [26]:
cm = confusion_matrix(Y_teste, Y_previsto)

In [27]:
cm

array([[64,  4],
       [ 4, 28]])

In [28]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

In [29]:
acc = accuracy_score(Y_teste,Y_previsto)

In [30]:
print("Acuracia = {:0.2f}%".format(acc*100))

Acuracia = 92.00%


In [31]:
prec= precision_score(Y_teste,Y_previsto)

In [32]:
print("Precisao = {:0.2f}%".format(prec*100))

Precisao = 87.50%


In [33]:
rec = recall_score(Y_teste,Y_previsto)

In [34]:
print("Recall = {:0.2f}%".format(rec*100))

Recall = 87.50%


In [35]:
f1 = f1_score(Y_teste,Y_previsto)

In [36]:
print("F1 = {:0.2f}%".format(f1*100))

F1 = 87.50%
