## Exemplo de Features e Labels

In [7]:
from datetime import datetime as dt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import graphviz
from sqlalchemy import create_engine

pg_host = 'localhost'
pg_port = 15432
pg_user = 'postgres'
pg_pass = 'postgres'
SEED = 20
np.random.seed(SEED)

engine = create_engine(f'postgresql://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/postgres')

df_data = pd.read_sql_table('vendas_carros', engine)
df_data.head()
# Explore Dataframe


Unnamed: 0,preco,vendido,idade_do_modelo,km_por_ano,modelo
0,30941.02,0,24,35085.22134,31
1,40557.96,0,26,12622.05362,33
2,89627.5,1,18,11440.79806,22
3,95276.14,1,9,43167.32682,14
4,117384.68,0,10,12770.1129,16


## Modelo Linear SVC

In [3]:
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.preprocessing import StandardScaler
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

df_X = df_data[["preco", "idade_do_modelo", "km_por_ano"]]
df_Y = df_data["vendido"]

train_x, test_x, train_y, test_y = train_test_split(df_X, df_Y, test_size=0.25, stratify=df_Y)

dummy_stratified = DummyClassifier()
dummy_stratified.fit(train_x, train_y)
dummy_accuracy = dummy_stratified.score(test_x, test_y) * 100
print('Accuracy Dummy: %.2f%%' % dummy_accuracy)

modelo_linear_svc = LinearSVC(dual=True)
modelo_linear_svc.fit(train_x, train_y)
predictions = modelo_linear_svc.predict(test_x)
accuracy = accuracy_score(test_y, predictions) * 100
print('Accuracy Linear SVC: %.2f%%' % dummy_accuracy)

scaler = StandardScaler()
scaler.fit(train_x)
train_x_scaled = scaler.transform(train_x)
test_x_scaled = scaler.transform(test_x)
modelo_svc = SVC()
modelo_svc.fit(train_x, train_y)
predictions = modelo_svc.predict(test_x)
accuracy_svc = accuracy_score(test_y, predictions) * 100
print('Accuracy SVC: %.2f%%' % accuracy_svc)

model_dec_tree = DecisionTreeClassifier(max_depth=2)
model_dec_tree.fit(train_x, train_y)
predictions = model_dec_tree.predict(test_x)
accuracy_dec_tree = accuracy_score(test_y, predictions) * 100
print('Accuracy Decision Tree: %.2f%%' % accuracy_dec_tree)

Training with 7500 elements and testing with 2500 elements
Accuracy: 56.96%
Dummy Accuracy: 58.00%




## Analisando Arvore de Decisão

In [4]:
features = df_X.columns
dot_data = export_graphviz(modelo_svc, out_file=None, feature_names=features, filled=True, rounded=True, class_names=["não", "sim"])
grafico = graphviz.Source(dot_data)

grafico

Accuracy: 76.60%
