<a href="https://colab.research.google.com/github/sy178sy/sy178sy.github.io/blob/master/Implementing_Algorithms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np

#Generate 5 dimensional dataset from three different gaussain Distributions for three different classes
mean1 = [2,8,4,3,5]
mean2 = [2,8,4,9,5]
mean3 = [8,8,4,9,5]

cov = [[2, 1, 1, 1, 1], 
       [1, 2, 0, 1, 0],
       [0, 0, 2, 0, 0], 
       [1, 1, 0, 2, 0],
       [0, 0, 0, 0, 2]] 


class1_sample = np.random.multivariate_normal(mean1, cov, size=100)
assert class1_sample.shape == (100,5), "The matrix has not the dimensions 5x100"

class2_sample = np.random.multivariate_normal(mean2, cov, size=100)
assert class2_sample.shape == (100,5), "The matrix has not the dimensions 5x100"

class3_sample = np.random.multivariate_normal(mean3, cov, size=100)
assert class3_sample.shape == (100,5), "The matrix has not the dimensions 5x100"

In [0]:
#Plot the data for any three dimension (Optional)
%pylab inline
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d

fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111, projection='3d')
plt.rcParams['legend.fontsize'] = 10   
ax.plot(class1_sample[0,:], class1_sample[1,:], class1_sample[2,:], 'o', markersize=8, color='blue', alpha=0.5, label='class1')
ax.plot(class2_sample[0,:], class2_sample[1,:], class2_sample[2,:], '^', markersize=8, alpha=0.5, color='red', label='class2')
ax.plot(class3_sample[0,:], class3_sample[1,:], class3_sample[2,:], '^', markersize=8, alpha=0.5, color='red', label='class3')

plt.title('Samples for class 1, class2 and class 3')
ax.legend(loc='upper right')

plt.show()

In [0]:
import pandas as pd

df1 = pd.DataFrame(data=class1_sample, index=np.arange(1, 101),columns=np.arange(1, 6))
df1['class'] = 1

df2 = pd.DataFrame(data=class2_sample, index=np.arange(1, 101),columns=np.arange(1, 6))
df2['class'] = 2

df3 = pd.DataFrame(data=class3_sample, index=np.arange(1, 101),columns=np.arange(1, 6))
df3['class'] = 3

In [0]:
df_final = df1.append([df2, df3], ignore_index=True)

In [0]:
X_train = df_final.drop('class', 1)
y_train = df_final['class']

In [0]:
mean1 = [2,8,4,3,5]
mean2 = [2,8,4,9,5]
mean3 = [8,8,4,9,5]

cov = [[2, 1, 1, 1, 1], 
       [1, 2, 0, 1, 0],
       [0, 0, 2, 0, 0], 
       [1, 1, 0, 2, 0],
       [0, 0, 0, 0, 2]] 

class1_sample_test = np.random.multivariate_normal(mean1, cov, size=20)
assert class1_sample_test.shape == (20,5), "The matrix has not the dimensions 5x20"

class2_sample_test = np.random.multivariate_normal(mean2, cov, size=20)
assert class2_sample_test.shape == (20,5), "The matrix has not the dimensions 5x20"

class3_sample_test = np.random.multivariate_normal(mean3, cov, size=20)
assert class3_sample_test.shape == (20,5), "The matrix has not the dimensions 5x20"

In [0]:
df1_test = pd.DataFrame(data=class1_sample_test, index=np.arange(1, 21),columns=np.arange(1, 6))
df1_test['class'] = 1

df2_test = pd.DataFrame(data=class2_sample_test, index=np.arange(1, 21),columns=np.arange(1, 6))
df2_test['class'] = 2

df3_test = pd.DataFrame(data=class3_sample_test, index=np.arange(1, 21),columns=np.arange(1, 6))
df3_test['class'] = 3

In [0]:
df_final_test = df1_test.append([df2_test, df3_test], ignore_index=True)

In [0]:
X_test = df_final_test.drop('class', 1)
y_test = df_final_test['class']

#Decision Tree

In [0]:
#1 Classify the data using decision tree

from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(max_depth=5)
clf.fit(X_train, y_train)

In [0]:
y_pred_dtc = clf.predict(X_test)

In [0]:
from sklearn import metrics

metrics.accuracy_score(y_test,y_pred_dtc)*100

#KNN

In [0]:
#1 Classify the data using KNN
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=11, weights='distance', algorithm='brute', leaf_size=100)
neigh.fit(X_train, y_train)

In [0]:
y_pred = neigh.predict(X_test)

In [0]:
from sklearn import metrics
metrics.accuracy_score(y_test,y_pred)*100

#PCA 5D to 2D

In [0]:
#1. Taking the whole dataset ignoring the class labels

#all_samples = 
#assert all_samples.shape == (5,300), 



In [0]:
#2. Computing the d-dimensional mean vector
mean_vector = np.mean(X_train, axis=0)
print('Mean Vector:\n', mean_vector)

In [0]:
#3.Computing the Covariance Matrix 
y= X_train.T

cov_mat = np.cov(y)
print('Covariance Matrix:\n', cov_mat)

In [0]:
#4. Compute eigenvectors and corresponding eigenvalues
from scipy import linalg as LA

eig_val_cov, eig_vec_cov = LA.eig(cov_mat)


In [0]:
eig_pairs = LA.eig(cov_mat)

In [0]:
#5 Sort the eigenvectors by decreasing eigenvalues

    
# Make a list/data frame of (eigenvalue, eigenvector) tuples
eig_pairs = []

for i in range (len(eig_val_cov)):
    temp = (eig_val_cov[i], eig_vec_cov[i])
    eig_pairs.append(temp)
# Sort the (eigenvalue, eigenvector) tuples from high to low

# Visually confirm that the list is correctly sorted by decreasing eigenvalues
for i in eig_pairs:
    print(i[0])

In [0]:
matrix_w = []
for i in range(2):
    matrix_w.append(eig_pairs[i][1].tolist())

mat_w = np.array(matrix_w)
print(mat_w)

In [0]:
#6 Choosing 2 eigenvectors with the largest eigenvalues
k=2
#matrix_w = 
print('Matrix W:\n', matrix_w)

In [0]:
#7 Transforming the samples onto the new subspace

transformed = np.matmul(matrix_w, (X_train.as_matrix()).T) 
#assert transformed.shape == (2,300)

plt.plot(transformed[0,0:100],transformed[1,0:100], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(transformed[0,100:200], transformed[1,100:200], '^', markersize=7, color='red', alpha=0.5, label='class2')
plt.plot(transformed[0,200:300], transformed[1,200:300], '*', markersize=7, color='green', alpha=0.5, label='class2')

#plt.xlim([-4,4])
#plt.ylim([-4,4])
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.legend()
plt.title('Transformed samples with class labels')

plt.show()

#Decision Tree

In [0]:
#1 Classify the data using decision tree
transformed_T = transformed.T
transformed_test= np.matmul(matrix_w, (X_test.as_matrix()).T)
transformed_test_T= transformed_test.T
print(transformed_test.shape)

clf = DecisionTreeClassifier(max_depth=2)
clf = clf.fit(transformed_T,y_train)
y_pred = clf.predict(transformed_test_T)

In [0]:
metrics.accuracy_score(y_test,y_pred)*100

#KNN

In [0]:
#1 Classify the data using KNN

k_array=range(1,10)

for k in k_array:
    KNN = KNeighborsClassifier(n_neighbors = k)
    KNN.fit(transformed_T,y_train)

    y_pred = KNN.predict(transformed_test_T)

    success=0
    fail=0
    
    for i in range(len(y_pred)):
        if(y_pred[i]==y_test[i]):
            success+=1
        else:
            fail+=1
        accuracy=success*100/(success+fail)
    print("Accuracy for k="+str(k)+" is:"+str(accuracy))