In [80]:
import pandas as pd 
from datetime import datetime
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.dummy import DummyClassifier


df = pd.read_csv('data/precos.csv')

df.head()

Unnamed: 0,milhas_por_ano,ano_do_modelo,preco,vendido
0,21801,2000,30941.02,1
1,7843,1998,40557.96,1
2,7109,2006,89627.5,0
3,26823,2015,95276.14,0
4,7935,2014,117384.68,1


In [81]:
df["km_por_ano"] = df["milhas_por_ano"] * 1.60934
df.head()

Unnamed: 0,milhas_por_ano,ano_do_modelo,preco,vendido,km_por_ano
0,21801,2000,30941.02,1,35085.22134
1,7843,1998,40557.96,1,12622.05362
2,7109,2006,89627.5,0,11440.79806
3,26823,2015,95276.14,0,43167.32682
4,7935,2014,117384.68,1,12770.1129


In [82]:
df["idade_carro"] = datetime.today().year - df["ano_do_modelo"]

df.head()

Unnamed: 0,milhas_por_ano,ano_do_modelo,preco,vendido,km_por_ano,idade_carro
0,21801,2000,30941.02,1,35085.22134,24
1,7843,1998,40557.96,1,12622.05362,26
2,7109,2006,89627.5,0,11440.79806,18
3,26823,2015,95276.14,0,43167.32682,9
4,7935,2014,117384.68,1,12770.1129,10


In [83]:
df.drop(["milhas_por_ano", "ano_do_modelo"], axis=1, inplace=True)
df.head()

Unnamed: 0,preco,vendido,km_por_ano,idade_carro
0,30941.02,1,35085.22134,24
1,40557.96,1,12622.05362,26
2,89627.5,0,11440.79806,18
3,95276.14,0,43167.32682,9
4,117384.68,1,12770.1129,10


In [84]:
x = df[['preco', 'idade_carro', 'km_por_ano']]
y = df['vendido']

seed = 2043

raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x,y, random_state=seed, stratify=y)

scaler = StandardScaler()
scaler.fit(raw_treino_x)

treino_x = scaler.transform(raw_treino_x)
teste_x = scaler.transform(raw_teste_x)


model = SVC(gamma = 'auto')
model.fit(treino_x,treino_y)

prevision = model.predict(teste_x)

accuracy = accuracy_score(teste_y, prevision) * 100

print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 76.68%


# Using DummyClassifier


In [85]:
seed = 2043

treino_x, teste_x, treino_y, teste_y = train_test_split(x,y, random_state=seed, stratify=y)

classificador = DummyClassifier(strategy='stratified')
classificador.fit(treino_x, treino_y)

preview = classificador.predict(teste_x)

accuracy = accuracy_score(teste_y, preview) * 100

print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 51.68%
