In [None]:
from sklearn.datasets import make_gaussian_quantiles
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np

## Helper functions

In [None]:
def draw(X, y):
    color = ['c', 'r']
    p = np.random.permutation(len(y))
    if (np.min(y) < 0):
        c = [color[(t + 1)/2] for t in y[p]]
    else:
        c = [color[t] for t in y[p]]
    plt.scatter(X[p,0], X[p,1],color=c,alpha=0.1)
    
    
def draw2(X1, X2):
    y1 = [0 for x in X1]
    y2 = [1 for x in X2]
    X = np.concatenate([X1,X2], axis=0)
    y = np.concatenate([y1, y2])
   
    draw(X, y)
    


## Two Gussians

In [None]:
# Construct dataset
# Gaussian 1
X1, y1 = make_gaussian_quantiles(cov=1.,
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)


X2, y2 = make_gaussian_quantiles(cov=1., mean=(2,2),
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)


#plt.scatter(X1[:,0], X1[:,1], color = 'r', alpha=0.1)
#plt.scatter(X2[:,0], X2[:,1], color = 'c', alpha=0.1)
draw2(X1, X2)
X = np.concatenate([X1,X2], axis=0)
y = np.concatenate([y1, y2 + 1])


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=101)
logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)
predictions = logmodel.predict(X_test)
draw(X_test, predictions)

In [None]:
print(classification_report(y_test, predictions))

## XOR

In [None]:
X1, y1 = make_gaussian_quantiles(cov=0.5,mean=(-1,-1),
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)


X2, y2 = make_gaussian_quantiles(cov=0.5, mean=(-1,1),
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)

X3, y3 = make_gaussian_quantiles(cov=0.5,mean=(1,1),
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)


X4, y4 = make_gaussian_quantiles(cov=0.5, mean=(1,-1),
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)

Xpos = np.concatenate([X1,X3], axis=0)
Xneg = np.concatenate([X2,X4], axis=0)
X = np.concatenate([X1, X2, X3, X4], axis = 0)
y = np.concatenate([y1, y2+1, y3, y4+ 1], axis = 0)
draw(X,y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=101)
logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)
predictions = logmodel.predict(X_test)
print(classification_report(y_test, predictions))
draw(X_test,predictions)

## Poly(2) features

In [None]:
poly = PolynomialFeatures(2)

X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)
logmodel = LogisticRegression()
logmodel.fit(X_train_poly,y_train)
predictions = logmodel.predict(X_test_poly)
print(classification_report(y_test, predictions))
draw(X_test,predictions)

## Poly(>>2) features

In [None]:
poly = PolynomialFeatures(25)

X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)
logmodel = LogisticRegression()
logmodel.fit(X_train_poly,y_train)
predictions = logmodel.predict(X_test_poly)
print(classification_report(y_test, predictions))
draw(X_test,predictions)

## Regularization

In [None]:
logmodel = LogisticRegression(C=1e-22)
logmodel.fit(X_train_poly,y_train)
predictions = logmodel.predict(X_test_poly)
print(classification_report(y_test, predictions))
draw(X_test,predictions)

## Adding offset

In [None]:
X1, y1 = make_gaussian_quantiles(cov=0.5,mean=(-1,-1+100),
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)


X2, y2 = make_gaussian_quantiles(cov=0.5, mean=(-1,1+100),
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)

X3, y3 = make_gaussian_quantiles(cov=0.5,mean=(1,1+100),
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)


X4, y4 = make_gaussian_quantiles(cov=0.5, mean=(1,-1+100),
                                 n_samples=5000, n_features=2,
                                 n_classes=1, random_state=1)

Xpos = np.concatenate([X1,X3], axis=0)
Xneg = np.concatenate([X2,X4], axis=0)
X = np.concatenate([X1, X2, X3, X4], axis = 0)
y = np.concatenate([y1, y2+1, y3, y4+ 1], axis = 0)
draw(X,y)

In [None]:
poly = PolynomialFeatures(2)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=101)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)
logmodel = LogisticRegression()
logmodel.fit(X_train_poly,y_train)
predictions = logmodel.predict(X_test_poly)
print(classification_report(y_test, predictions))
draw(X_test,predictions)

## min-max normalization

In [None]:
scaler = MinMaxScaler(feature_range = (0,1))

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)
logmodel = LogisticRegression()
logmodel.fit(X_train_poly,y_train)
predictions = logmodel.predict(X_test_poly)
print(classification_report(y_test, predictions))
draw(X_test,predictions)

## standard normalization

In [None]:
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=101)
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)
logmodel = LogisticRegression()
logmodel.fit(X_train_poly,y_train)
predictions = logmodel.predict(X_test_poly)
print(classification_report(y_test, predictions))
draw(X_test,predictions)