# Learning scikit-learn

## Getting th IRIS data

In [258]:
from sklearn import datasets
import numpy as np
iris = datasets.load_iris()
X = iris.data[:, [2, 3]] #Col 2 = peatal length, Col 3 = petal width
y = iris.target

## Dividing data into test and training sets

In [259]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

## Standardizing the features

In [260]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std  = sc.transform(X_test) # The X_test is normalized with the values of mu ans std obtained from X_train

## Making combined and standardized Feature matrix

In [261]:
X_combined_std= np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

## Plotting function for decision boundaries, test and training samples

In [262]:
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
    # Setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])
    
    # Plotting the data
    x1_min, x1_max = min(X[:,0]) - 1, max(X[:,0]) + 1   
    x2_min, x2_max = min(X[:,1]) - 1, max(X[:,1]) + 1 
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 
                           np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(x1_min, x1_max)
    plt.ylim(x2_min, x2_max)
    
    # Plot all samples
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y==cl, 0], y=X[y==cl, 1], 
                    alpha = 0.8, c=cmap(idx), marker=markers[idx], label=cl)
    
    # Highlight test samples
    if test_idx:
        X_test, y_test = X[test_idx, :], y[test_idx]
        plt.scatter(X_test[:, 0], X_test[:, 1], alpha=1.0, c='', marker='o', label='test sample')

## Learning data using Perceprtron

In [263]:
from sklearn.linear_model import Perceptron
ppn = Perceptron(n_iter = 40, eta0 = 0.1, random_state=0)
ppn.fit(X_test_std, y_test)

Perceptron(alpha=0.0001, class_weight=None, eta0=0.1, fit_intercept=True,
      n_iter=40, n_jobs=1, penalty=None, random_state=0, shuffle=True,
      verbose=0, warm_start=False)

## Predicting with Perceptron and comparison with actual test targets

In [264]:
y_pred = ppn.predict(X_test_std)
print ('Misclassified test samples for Perceptron: %d' %(y_test != y_pred).sum())

Misclassified test samples for Perceptron: 4


## Plotting the samples and decision boundary for Perceptron

In [265]:
#plot_decision_regions(X_combined_std, y_combined, classifier=ppn, test_idx=range(105, 150))  
#plt.show()

## Learning data using Logistic Regression

In [266]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(C=1000.0, random_state=0)
lr.fit(X_train_std, y_train)

LogisticRegression(C=1000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l2', random_state=0,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

## Predicting with Logistic Regression and comparison with actual test targets

In [267]:
y_pred_lr = lr.predict(X_test_std)
print ('Misclassiified test samples for Logistic Regression: %d' %(y_test != y_pred_lr).sum())

Misclassiified test samples for Logistic Regression: 1


## Plotting samples and decision boundry for Logistic Regression

In [268]:
#plot_decision_regions(X_combined_std, y_combined, classifier=lr, test_idx=range(105, 150)) 
#plt.show()

## Learning data with SVM

In [269]:
from sklearn.svm import SVC
svm = SVC(kernel='linear', C=1.0, random_state=0)
svm.fit(X_train_std, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=0, shrinking=True,
  tol=0.001, verbose=False)

## Predicting with SVMand comparison with actual test targets

In [270]:
y_pred_svm = svm.predict(X_test_std)
print ('Misclassified test samples for SVM: %d' %(y_test != y_pred_svm).sum())

Misclassified test samples for SVM: 1


## Plotting samples and decision boundary for SVM

In [271]:
#plot_decision_regions(X_combined_std, y_combined, svm, test_idx=range(105, 150))
#plt.show()

## Nonlinear boundaries using SVM

In [299]:
n_samples  = 200 
n_features = 2
np.random.seed(0)
X_xor = np.random.randn(n_samples, n_features)
y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0)
y_xor = np.where(y_xor, 1, -1)

plt.scatter(X_xor[y_xor == 1, 0], X_xor[y_xor == 1, 1], c='b')
plt.scatter(X_xor[y_xor == -1, 0], X_xor[y_xor == -1, 1], c='r')

svm_xor = SVC(kernel='rbf', random_state=0, gamma=0.1, C=10.0)
svm_xor.fit(X_xor, y_xor)
plot_decision_regions(X_xor, y_xor, classifier=svm_xor)
plt.show()