# Section 2.1
# Gradient Descent

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def gradientDescent(x, y, theta, alpha, m, numIterations):
    xTrans = x.transpose()
    for i in range(0, numIterations):
        hypothesis = np.dot(x, theta)
        loss = hypothesis - y 
        cost = np.sum(loss ** 2) / (2 * m)
        #print("Iteration %d | Cost: %f" % (i, cost))
        # avg gradient per example
        gradient = np.dot(xTrans, loss) / m 
        # update
        theta = theta - alpha * gradient
    return theta

x = np.array([1,2])
y= np.array([3,5])
y=y.transpose()

plt.plot(x,y,"r") 
plt.show()

theta=([1,1])
theta_grad= gradientDescent(x,y,theta,alpha=0.001,m=2,numIterations=100)

print("The computed value of Theta:",theta_grad)

checker= np.array([1,2])
basisVector = np.dot(theta_grad, checker.transpose())
print(basisVector)

### Multivariate Gradient Descent

In [None]:
x = np.matrix([[1,2], [3, 4]])
y= np.matrix([3, 5])
y=y.transpose()
plt.plot(x,y,"r") 
#plt.show()

theta=([1,1])
theta_grad= gradientDescent(x,y,theta,alpha=0.001,m=2,numIterations=100)

print("The computed value of Theta:",theta_grad)

checker= np.array([1,2])
basisVector = np.dot(theta_grad, checker.transpose())
print(basisVector)

### Stochastic Gradient Descent

For more details: http://scikit-learn.org/stable/modules/sgd.html

# Section 2.2
# Section 2.2.1
# Single Variable Linear Regression

### Import Necessary Libraries:

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

### Load the diabetes dataset

In [None]:
diabetes = datasets.load_diabetes()

### Lets Take a look at the dataset:

In [None]:
print(diabetes)

### Use only one feature

In [None]:
diabetes_X = diabetes.data[:, np.newaxis, 2]

### Split the data into training/testing sets

In [None]:
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]

print(diabetes_X_train)

### Split the targets into training/testing sets

In [None]:
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]

print(diabetes_y_train)

### Create linear regression object

In [None]:
regr = linear_model.LinearRegression()

### Train the model using the training sets

In [None]:
regr.fit(diabetes_X_train, diabetes_y_train)

### Make predictions using the testing set

In [None]:
diabetes_y_pred = regr.predict(diabetes_X_test)
#print(diabetes_X_test)
#print(diabetes_y_pred)
#print(diabetes_y_test)

### The coefficients

In [None]:
print('Coefficients: \n', regr.coef_)

### The mean squared error

In [None]:
print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred))

### Explained variance score: 1 is perfect prediction

In [None]:
print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))

### Plot outputs

In [None]:
plt.scatter(diabetes_X_test, diabetes_y_test,  color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)

plt.xticks(())
plt.yticks(())

plt.show()

# Section 2.2.2
# Multivariate Linear Regression

### Importing necessary Libraries

In [None]:
import pandas as pd
from sklearn import linear_model

### Loading Dataset from CSV Files

In [None]:
dataTrain = pd.read_csv("dataTrain.csv")
dataTest = pd.read_csv("dataTest.csv")

### Lets Take a Look at the Training Set:

In [None]:
dataTrain

### The Test Set:

In [None]:
dataTest

### Defining the Train & Test Set

In [None]:
x_train = dataTrain[['Temperature(K)', 'Pressure(ATM)']]#.reshape(-1,2)
#x_train = x_train_df.values
#x_train = x_train.reshape(-1,2)
y_train = dataTrain['CompressibilityFactor(Z)']
#y_train = y_train_df.values
x_test = dataTest[['Temperature(K)', 'Pressure(ATM)']]#.reshape(-1,2)
y_test = dataTest['CompressibilityFactor(Z)']

### Defining and Training The Model

In [None]:
clf_linear = linear_model.LinearRegression()
model = clf_linear.fit(x_train, y_train)

### Predicting Values for the Test Set

In [None]:
print (model.predict(x_test))

# Section 2.3
# Support Vector Machines

Reference to the SVM Algorithm: https://www.youtube.com/watch?v=1NxnPkZM9bc

## Section 2.3.1
## The Iris Dataset:

In [None]:

# Code source: Gaël Varoquaux
# Modified for documentation by Jaques Grobler
# License: BSD 3 clause

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.decomposition import PCA

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5

plt.figure(2, figsize=(8, 6))
plt.clf()

# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1,
            edgecolor='k')
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())

# To getter a better understanding of interaction of the dimensions
# plot the first three PCA dimensions
fig = plt.figure(1, figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
X_reduced = PCA(n_components=3).fit_transform(iris.data)
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=y,
           cmap=plt.cm.Set1, edgecolor='k', s=40)
ax.set_title("First three PCA directions")
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd eigenvector")
ax.w_zaxis.set_ticklabels([])

plt.show()

## Section 2.3.2
## Effect of Different Kernels:

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, svm

iris = datasets.load_iris()
X = iris.data
y = iris.target

X = X[y != 0, :2]
y = y[y != 0]

n_sample = len(X)

np.random.seed(0)
order = np.random.permutation(n_sample)
X = X[order]
y = y[order].astype(np.float)

X_train = X[:int(.9 * n_sample)]
y_train = y[:int(.9 * n_sample)]
X_test = X[int(.9 * n_sample):]
y_test = y[int(.9 * n_sample):]

# fit the model
for fig_num, kernel in enumerate(('linear', 'rbf', 'poly')):
    clf = svm.SVC(kernel=kernel, gamma=10)
    clf.fit(X_train, y_train)

    plt.figure(fig_num)
    plt.clf()
    plt.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=plt.cm.Paired,
                edgecolor='k', s=20)

    # Circle out the test data
    plt.scatter(X_test[:, 0], X_test[:, 1], s=80, facecolors='none',
                zorder=10, edgecolor='k')

    plt.axis('tight')
    x_min = X[:, 0].min()
    x_max = X[:, 0].max()
    y_min = X[:, 1].min()
    y_max = X[:, 1].max()

    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
    Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(XX.shape)
    plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])

    plt.title(kernel)
plt.show()

## Section 2.3.3
## Support Vector Machine Classification

### Importing necessary Functions

In [None]:
from sklearn import datasets
from sklearn import cross_validation
from sklearn import svm

### Loading the Iris Dataset

In [None]:
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

### Lets take a look at the Data

In [None]:
print(X)
print(y)

### Splitting the Data into Train and Test Set

In [None]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

### Defining the Classifier

In [None]:
clf = svm.SVC(kernel = 'poly')

### Training and Testing the Output of the Network:

In [None]:
clf.fit(X_train, y_train)

accuracy= clf.score(X_test, y_test)
print("The accuracy of the network is:", accuracy)
print("Prediction for the Test set is:")
print( clf.predict(X_test) )
print("Lets Compare it to the original target test set:")
print(y_test)

## Section 2.3.4
## Support Vector Machine Regression:

In [None]:
from sklearn import datasets
from sklearn import cross_validation
from sklearn import svm

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

clf = svm.SVR(kernel = 'poly')

clf.fit(X_train, y_train)

accuracy= clf.score(X_test, y_test)
print("The accuracy of the network is:", accuracy)
print("Prediction for the Test set is:")
print( clf.predict(X_test) )
print("Lets Compare it to the original target test set:")
print(y_test)

# Section 2.4
# Using Different Classification Methods
## Section 2.4.1
## Linear Regression:

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn import datasets
from sklearn import cross_validation

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

clf = LinearRegression()

clf.fit(X_train, y_train)

accuracy= clf.score(X_test, y_test)
print("The accuracy of the network is:", accuracy)
print("Prediction for the Test set is:")
print( clf.predict(X_test) )
print("Lets Compare it to the original target test set:")
print(y_test)

In [None]:
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

## Section 2.4.2
## Random Forest Classification:

reference: Random Forest: https://www.youtube.com/watch?v=loNcrMjYh64


reference: ensemble methods: http://scikit-learn.org/stable/modules/ensemble.html

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
from sklearn import cross_validation

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

clf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
clf.fit(X_train, y_train)

accuracy= clf.score(X_test, y_test)
print("The accuracy of the network is:", accuracy)
print("Prediction for the Test set is:")
print( clf.predict(X_test) )
print("Lets Compare it to the original target test set:")
print(y_test)

Random forest Parameters:

The main parameters to adjust when using these methods is n_estimators and max_features. The former is the number of trees in the forest. The larger the better, but also the longer it will take to compute. In addition, note that results will stop getting significantly better beyond a critical number of trees. The latter is the size of the random subsets of features to consider when splitting a node. The lower the greater the reduction of variance, but also the greater the increase in bias. Empirical good default values are max_features=n_features for regression problems, and max_features=sqrt(n_features) for classification tasks (where n_features is the number of features in the data). Good results are often achieved when setting max_depth=None in combination with min_samples_split=2 (i.e., when fully developing the trees). Bear in mind though that these values are usually not optimal, and might result in models that consume a lot of RAM. 


## Section 2.4.3
## Ada Boost Classifier

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn import datasets
from sklearn import cross_validation

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

clf = AdaBoostClassifier()
clf.fit(X_train, y_train)

accuracy= clf.score(X_test, y_test)
print("The accuracy of the network is:", accuracy)
print("Prediction for the Test set is:")
print( clf.predict(X_test) )
print("Lets Compare it to the original target test set:")
print(y_test)

## 2.4.4
## Mulit-Layer Perceptrons/ MLP / ANN (Artificial Neural Network)

Reference: ANN: https://www.youtube.com/watch?v=LCzufhtIFnY

Reference: Scikit-learn : http://scikit-learn.org/stable/modules/neural_networks_supervised.html#multi-layer-perceptron

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn import datasets
from sklearn import cross_validation

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

#clf = MLPClassifier(alpha=1)

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)

clf.fit(X_train, y_train)

accuracy= clf.score(X_test, y_test)
print("The accuracy of the network is:", accuracy)
print("Prediction for the Test set is:")
print( clf.predict(X_test) )
print("Lets Compare it to the original target test set:")
print(y_test)

# Section 2.5: Visiting an Example
## Kth Nearest Network implementation using Pima-Indian Dataset

Reference : Pima indian dataset:  https://github.com/LamaHamadeh/Pima-Indians-Diabetes-DataSet-UCI

### important necessary libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

### loading the dataframe

In [None]:

df = pd.read_csv('pima_indians_diabetes.txt')

### defining the columns

In [None]:
 
df.columns =['No_pregnant', 'Plasma_glucose', 'Blood_pres', 'Skin_thick', 
             'Serum_insu', 'BMI', 'Diabetes_func', 'Age', 'Class']

### checking the dataframe

In [None]:
print(df.head())
print(df.dtypes)
print(df.shape)

### identify nans ( missing data) 

In [None]:

def num_missing(x):
  return sum(x.isnull())
#Applying per column:
print ("Missing values per column:")
print (df.apply(num_missing, axis=0),'\n') #no nans

## Apply the K nearest neighbour classifier

Reference: kNN Algorithm: https://www.youtube.com/watch?v=UqYde-LULfs

Reference: kNN Sklearn : http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html

### split the data into training and testing datasets

In [None]:
X = np.array(df.drop(['Class'], axis = 1))
y = np.array(df['Class'])
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size =0.5,random_state = 7)

### apply the knn method:

this is basically a binary classifier detecting diabetes or not based on the test data

In [None]:
Knn = KNeighborsClassifier(n_neighbors = 2)

### train the data

In [None]:
Knn.fit(X_train,y_train)

### test the data

In [None]:
accuracy = Knn.score(X_test, y_test)#this to see how accurate the algorithm is in terms 
#of defining the diabetes to be either 1 or 0
print('accuracy of the model is: ', accuracy) #0.73

## Plotting and visualisation (focus on only two features from the dataset)

In [None]:
X1 = np.array(df[['Plasma_glucose','Age']]) #choose only two features
Y = np.array(df['Class']) #the label of the dataset

h = .02  # step size in the mesh
 
# Create color maps using hex_colors
cmap_light = ListedColormap(['#FFAAAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#0000FF'])

### apply Neighbours Classifier and fit the data.

In [None]:

X_train, X_test, Y_train, Y_test = train_test_split (X1, y, test_size=0.5, random_state = 7)
Knn = KNeighborsClassifier(n_neighbors = 15)
Knn.fit(X1, y)

### Plot the decision boundary. For that, we will assign a color to each (this portion might take a while to run. please be patient) 

In [None]:

# point in the mesh [x_min, m_max]x[y_min, y_max].
x_min, x_max = X1[:, 0].min() - 1, X1[:, 0].max() + 1
y_min, y_max = X1[:, 1].min() - 1, X1[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = Knn.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

# Plot also the training points
plt.scatter(X1[:, 0], X1[:, 1], c=y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xlabel('Plasma glucose concentration a 2 hours in an oral glucose tolerance test')
plt.ylabel('Age')
plt.title('K = 15')

plt.show()

### The part of code we actually need in a nuthsell:

In [None]:
#important necessary libraries
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
#-------------------

#loading the dataframe
df = pd.read_csv('pima_indians_diabetes.txt')

#-------------------

#defining the columns 
df.columns =['No_pregnant', 'Plasma_glucose', 'Blood_pres', 'Skin_thick', 
             'Serum_insu', 'BMI', 'Diabetes_func', 'Age', 'Class']

X = np.array(df.drop(['Class'], axis = 1))
y = np.array(df['Class'])
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size =0.5, 
                                                    random_state = 7)
Knn = KNeighborsClassifier(n_neighbors = 2)

#train the data
Knn.fit(X_train,y_train)

accuracy = Knn.score(X_test, y_test)#this to see how accurate the algorithm is in terms 
#of defining the diabetes to be either 1 or 0
print('accuracy of the model is: ', accuracy) #0.73

predictions = Knn.predict(X_test)
print('predictions generated by the knn:')
print(predictions)
print('the actual diabetes ground truth:')
print(y_test)

# Section 2.6
# Visualizing effects of different classifiers:

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

h = .02  # step size in the mesh

names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA"]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                           random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)

datasets = [make_moons(noise=0.3, random_state=0),
            make_circles(noise=0.2, factor=0.5, random_state=1),
            linearly_separable
            ]

figure = plt.figure(figsize=(27, 9))
i = 1
# iterate over datasets
for ds_cnt, ds in enumerate(datasets):
    # preprocess dataset, split into training and test part
    X, y = ds
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=.4, random_state=42)

    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    # just plot the dataset first
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
    if ds_cnt == 0:
        ax.set_title("Input data")
    # Plot the training points
    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,
               edgecolors='k')
    # and testing points
    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6,
               edgecolors='k')
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())
    i += 1

    # iterate over classifiers
    for name, clf in zip(names, classifiers):
        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)

        # Plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, x_max]x[y_min, y_max].
        if hasattr(clf, "decision_function"):
            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
        else:
            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

        # Put the result into a color plot
        Z = Z.reshape(xx.shape)
        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

        # Plot also the training points
        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,
                   edgecolors='k')
        # and testing points
        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
                   edgecolors='k', alpha=0.6)

        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())
        if ds_cnt == 0:
            ax.set_title(name)
        ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
                size=15, horizontalalignment='right')
        i += 1

plt.tight_layout()
plt.show()