# Classification

## Generate data

In [1]:
import numpy as np
from numpy.random import binomial, normal
from scipy.stats import bernoulli, binom

np.random.seed(37)

n = 10000

X = np.hstack([
    normal(0.0, 1.0, n).reshape(n, 1), 
    normal(0.0, 1.0, n).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1)
])

z = np.dot(X, np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])) + normal(0.0, 0.2, n)
p = 1.0 / (1.0 + np.exp(-z))
y = binom.rvs(1, p)

## Types of classifiers

### Logistic regression

In [2]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(penalty='l1', solver='liblinear', fit_intercept=False)
lr.fit(X, y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=False,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l1',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

### Gaussian Naive Bayes

In [3]:
from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(X, y)

GaussianNB(priors=None, var_smoothing=1e-09)

### Neural network, Multi-Layer Perceptron (MLP)

In [4]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(max_iter=1000, alpha=0.01)
mlp.fit(X, y)

MLPClassifier(activation='relu', alpha=0.01, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=1000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

### Decision tree

In [5]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()
dt.fit(X, y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

### Linear Support Vector Machine (SVM)

In [6]:
from sklearn.svm import LinearSVC

svm = LinearSVC()
svm.fit(X, y)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

### Stochastic gradient descient (SGD)

In [7]:
from sklearn.linear_model import SGDClassifier

sgd = SGDClassifier()
sgd.fit(X, y)

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge',
              max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False)

### Random Forest

In [8]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100)
rf.fit(X, y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

### AdaBoost

In [9]:
from sklearn.ensemble import AdaBoostClassifier

ab = AdaBoostClassifier()
ab.fit(X, y)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=50, random_state=None)

## Performance