In [0]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import image as mpimg
from PIL import Image
from sklearn import linear_model as lm
from sklearn import model_selection as ms
from sklearn import preprocessing as prep
from sklearn import metrics as met
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn import svm
import pickle

%matplotlib inline
plt.style.use ('dark_background')

# Ideja je sledeca: podaci su mi isecci originalnih slika i imaju dimenzije 64x64. Na osnovu njih treniram model
# Za svaku sliku iz trening skupa, prvo je iseckam na ovakve delove a onda trazim Valda u tim delovima
# Ako se nalazi u nekom od delova, onda postoji i u celoj slici, inace to nije slucaj
# Pored svakog isecka mogu da cuvam i odgovarajuce koordinate tog dela u originalnoj slici, te se lako pokaze gde je Valdo, ako postoji


In [0]:
# Unosenje slika pomocu pandas

csv_putanja = os.path.join ('..', 'data', 'CSV', '')
gcolab_csv_putanja = os.path.join ('CSV', '')

df = pd.read_csv (gcolab_csv_putanja + 'allwaldo64.csv')

X = df.drop ('Waldo', axis = 1)
y = df['Waldo']


In [0]:
# Deljenje na skupove za trening, test i validaciju

x_trening_valid, x_test, y_trening_valid, y_test = ms.train_test_split(X, y, train_size = 0.67, test_size=0.33, stratify = y, random_state = 3)
x_trening, x_valid, y_trening, y_valid = ms.train_test_split(x_trening_valid, y_trening_valid, train_size = 0.7, test_size = 0.3, stratify = y_trening_valid, random_state = 3)


In [0]:
# Pretprocesiranje slika standardizacijom

'''

skaler_tv = prep.StandardScaler()
skaler_tv.fit(x_trening)
x_trening = skaler_tv.transform(x_trening)
x_valid = skaler_tv.transform(x_valid)

skaler_tt = prep.StandardScaler()
skaler_tt.fit(x_trening_valid)
x_trening_valid = skaler_tv.transform(x_trening_valid)
x_test = skaler_tv.transform(x_test)

'''


  return self.partial_fit(X, y)
  """
  
  return self.partial_fit(X, y)
  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()


In [0]:
# Primena analize glavnih komponenti sa smanjivanjem dimenzionalnosti podataka
# 500 proizvoljno biram

agk_tv = PCA (n_components = 500, random_state = 3)
agk_tv.fit (x_trening)
x_trening = agk_tv.transform (x_trening)
x_valid = agk_tv.transform (x_valid)

agk_tt = PCA (n_components = 500, random_state = 3)
agk_tt.fit (x_trening_valid)
x_trening_valid = agk_tt.transform (x_trening_valid)
x_test = agk_tt.transform (x_test)


In [0]:
# Dobijanje najboljeg modela za model potpornih vektora pomocu ugradjenih funkcija
# koje koriste kros validaciju

mpv_model = svm.SVC()
parametri_mpv = {'C': [10**i for i in range(-5, 5)], 'gamma': [10**i for i in range(-5, 5)]}
mpv_najb = ms.GridSearchCV(mpv_model, param_grid=parametri_mpv, scoring='accuracy', cv = 5, return_train_score=True, n_jobs = -1)

mpv_najb.fit(x_trening_valid, y_trening_valid)


KeyboardInterrupt: 

In [0]:
# Cuvanje dobijenog modela

with open('MPV_model.pkl', 'wb') as fid:
    pickle.dump(mpv_najb, fid)    

'''
# Ucitavanje modela
with open('MPV_model.pkl', 'rb') as fid:
    mpv_najb = cPickle.load(fid)
'''


In [0]:
# Isti postupak za KNN model

kns_model = KNeighborsClassifier (n_jobs = -1)
parametri_kns = {'n_neighbors': [3, 4, 5, 6, 7, 8, 9, 10]}
kns_najb = ms.GridSearchCV(kns_model, param_grid=parametri_kns, scoring='accuracy', cv = 5, return_train_score=True, n_jobs = -1)

kns_najb.fit(x_trening_valid, y_trening_valid)


In [0]:
# Cuvanje dobijenog modela

with open('KNS_model.pkl', 'wb') as fid:
    pickle.dump(kns_najb, fid)    

'''
# Ucitavanje modela
with open('KNS_model.pkl', 'rb') as fid:
    kns_najb = cPickle.load(fid)
'''


In [0]:
# Kreiranje modela logisticke regresije

lr_model = lm.LogisticRegression ()
lr_model.fit (x_trening_valid, y_trening_valid)


In [0]:
# Cuvanje dobijenog modela

with open('LR_model.pkl', 'wb') as fid:
    pickle.dump(lr_model, fid)    

'''
# Ucitavanje modela
with open('LR_model.pkl', 'rb') as fid:
    lr_model = cPickle.load(fid)
'''


In [0]:
# Evaluacija rezultata

# Metod potpornih vektora

y_procena_mpv = mpv_najb.predict(x_test)
print ('Tacnost modela potpornih vektora je:', met.accuracy_score(y_test, y_procena_mpv))


# Metod K najblizih suseda

y_procena_kns = kns_najb.predict(x_test)
print ('Tacnost modela K najblizih suseda je:', met.accuracy_score(y_test, y_procena_kns))

# Metod logisticke regresije

y_procena_lr = lr_model.predict(x_test)
print ('Tacnost modela logisticke regresije je:', met.accuracy_score(y_test, y_procena_lr))
