# Inteligencia Artificial Proyecto #2: Clasificación
## Integrantes
    - Luis Berrospi
    - Pedro Dominguez
    - Carlos Esteban Guerrero Robles

### Lectura de datos

In [43]:
import pandas as pd
import numpy as np
import os

#Librerías de reducción de dimensiones
import pywt
import pywt.data

#Librerías para lectura/edición de imágenes
from skimage.io import imread, imshow
from skimage.transform import resize
from skimage.color import rgb2gray

#Librerías para gráficas
import matplotlib.pyplot as plt
from tabulate import tabulate

#Librerías de modelos de clasificación
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import tree
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score # k fold cross validation
from sklearn import metrics # k fold cross validation

In [11]:
train_csv_path = "../data/sign_mnist_train.csv"

df_train = pd.read_csv(train_csv_path)
df_train_x = df_train.loc[:, "pixel1":"pixel784"]
df_train_y = df_train.label

df_train = df_train.to_numpy()

test_csv_path = "../data/sign_mnist_test.csv"

df_test = pd.read_csv(test_csv_path)
df_test_x = df_test.loc[:, "pixel1":"pixel784"]
df_test_y = df_test.label

df_test = df_test.to_numpy()

### Reducción de dimensionalidad

In [17]:
def reduce_dimension(letter, cuts, wavelet):
  for i in range(cuts):
    (letter, cD) = pywt.dwt(letter, wavelet)
  return letter

def vectorizar(matrix):
  return matrix.flatten()

def proccess_letters(dataset, wavelet):
  
  data_X = []
  data_Y = []

  for letter_features in dataset:
      
      letter = letter_features[0]
      data_Y.append(letter)

      letter_features = reduce_dimension(letter_features[1:], 2, wavelet)
      letter_features = vectorizar(letter_features)
      data_X.append(letter_features)

  return data_X, data_Y

#### Wavelet transform

In [18]:
discrete_wavelets = ['haar', 'bior', 'coif', 'rbio', 'sym', 'db', 'dmey']
discrete_wavelets = [ wavelet for wavelet in pywt.wavelist(kind='discrete')]

#### PCA

In [5]:
#print(df_train.shape)
#print(df_test.shape)
random_state = 0
pca = make_pipeline(StandardScaler(), PCA(n_components=2, random_state=random_state))

### SVM

In [6]:
def SVM_classification(df_train_x, df_train_y, df_test_x):
    _svm = svm.SVC(kernel='linear')
    _svm.fit(df_train_x,df_train_y)
    svm_predicted = _svm.predict(df_test_x)

    return svm_predicted

#### Estimación del error

In [7]:
def get_SVM_error(svm_predicted):
    svm_success = 0.0
    for i, val in enumerate(svm_predicted):
        if val == df_test_y[i]:
            svm_success += 1
            
    return svm_success/len(svm_predicted)*100

#### Tabla de resumen

In [None]:
svm_wavelets = list()
svm_acurracies = list()


for wavelet in discrete_wavelets:
    df_train_x, df_train_y = proccess_letters(df_train, wavelet)
    df_test_x, df_test_y = proccess_letters(df_test, wavelet)

    svm_predicted = SVM_classification(df_train_x, df_train_y, df_test_x)
    svm_accuracy = get_SVM_error(svm_predicted)

    svm_wavelets.append(wavelet)
    svm_acurracies.append(svm_accuracy)

In [45]:
svm_experiment = [[svm_wavelets[i][:], accuracy] for i, accuracy in enumerate(svm_acurracies) if accuracy > 83]
svm_experiment = sorted(svm_experiment, key = lambda item: item[1], reverse = True)

In [47]:
fig = plt.figure(figsize = (10, 5))
 
print(tabulate(svm_experiment))
""" # creating the bar plot
plt.bar(svm_experiment.keys(), svm_experiment.values())
 
plt.xlabel("Wavelets")
plt.ylabel("Accuracies")
plt.title("SVM")
plt.show() """

-------  -------
db4      83.6168
coif5    83.589
coif8    83.589
coif2    83.575
sym5     83.575
rbio2.2  83.4495
coif7    83.3659
coif6    83.2962
coif3    83.2125
coif4    83.1149
sym9     83.1149
coif10   83.0312
db10     83.0312
-------  -------


' # creating the bar plot\nplt.bar(svm_experiment.keys(), svm_experiment.values())\n \nplt.xlabel("Wavelets")\nplt.ylabel("Accuracies")\nplt.title("SVM")\nplt.show() '

<Figure size 1000x500 with 0 Axes>

### KNN

In [7]:
knn = KNeighborsClassifier(n_neighbors=24)
knn.fit(df_train_x,df_train_y)
knn_predicted = knn.predict(df_test_x)

: 

: 

#### Estimación del error

In [12]:
knn_success = 0.0
for i, val in enumerate(knn_predicted):
    if val == df_test_y[i]:
        knn_success += 1
print(knn_success/len(knn_predicted)*100)

76.26882320133855


#### Tabla de resumen

### Decision tree

In [14]:
dt = tree.DecisionTreeClassifier()
dt.fit(df_train_x,df_train_y)
dt_predicted = dt.predict(df_test_x)

#### Estimación del error

In [15]:
dt_success = 0.0
for i, val in enumerate(dt_predicted):
    if val == df_test_y[i]:
        dt_success += 1
print(dt_success/len(dt_predicted)*100)

46.9882877858338


#### Tabla de resumen

### K Fold Cross Validation

In [12]:
def sklearn_k_fold_cross_validation (model, x_train, y_train, k)
    scores = cross_val_score(model, x_train, y_, cv = k, scoring = ‘accuracy’)
    errors = 1 - scores
    return scores, errors

SyntaxError: invalid syntax (3753608538.py, line 1)