In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [2]:
data = pd.read_csv('ecommerce_consumers.csv')
data.head()

Unnamed: 0,ratio,time,label
0,0.54,17.2,female
1,0.93,18.2,male
2,0.84,13.6,female
3,0.19,6.0,male
4,0.89,13.2,female


In [3]:
y = data.pop('label')
X = data

In [4]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

logreg = LogisticRegression(random_state=42)
param_grid = {'C': [0.01, 0.1, 1, 10, 100]}

model_logreg = GridSearchCV(estimator=logreg,
                           param_grid=param_grid,
                           cv=3,
                           verbose=1,
                           n_jobs=-1)
model_logreg.fit(X,y)

dec_tree = DecisionTreeClassifier(random_state=42)
params = {
    "max_depth": [2,3,5,10,20],
    "min_samples_leaf": [5,10,20,50,100,500]
}

model_dec_tree= GridSearchCV(estimator=dec_tree,
                           param_grid=params,
                           cv=3,
                           verbose=1,
                            n_jobs=-1)
model_dec_tree.fit(X,y)

svm = SVC(random_state=42)
hyper_params = [ {'kernel':['linear','rbf','poly'],
                  'gamma': [1e-1, 1e-2, 1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
               ]

model_svm = GridSearchCV(estimator=svm,
                           param_grid=hyper_params,
                           cv=3,
                           verbose=1,
                        n_jobs=-1)
model_svm.fit(X,y)

Fitting 3 folds for each of 5 candidates, totalling 15 fits
Fitting 3 folds for each of 30 candidates, totalling 90 fits
Fitting 3 folds for each of 48 candidates, totalling 144 fits


GridSearchCV(cv=3, estimator=SVC(random_state=42), n_jobs=-1,
             param_grid=[{'C': [1, 10, 100, 1000],
                          'gamma': [0.1, 0.01, 0.001, 0.0001],
                          'kernel': ['linear', 'rbf', 'poly']}],
             verbose=1)

In [5]:
#predictions for logistic
best_log = model_logreg.best_estimator_
y_pred = best_log.predict(X)

accuracy_score(y_true=y, y_pred=y_pred)

0.675

In [6]:
#predictions for dec tree
best_tree = model_dec_tree.best_estimator_
y_pred = best_tree.predict(X)

accuracy_score(y_true=y, y_pred=y_pred)

0.975

In [7]:
#predictions for svm
best_svm = model_svm.best_estimator_
y_pred = best_svm.predict(X)

accuracy_score(y_true=y, y_pred=y_pred)

0.985

In [9]:
model_svm.best_params_

{'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}