In [1]:
# Imports de base
import os
import pandas as pd
import numpy as np

# Classification sans features
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression

# Mesures : accuracy , rappel, pr√©cision, f1 score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score

# Skimage 
from skimage import io
from skimage import transform
from skimage.util import img_as_ubyte
from skimage.util import img_as_int

# MultiThreading
from joblib import Parallel, delayed

In [2]:
threads = 6
subset = 'train'

folder = ".."+ os.sep + "data" + os.sep + "resized2" + os.sep
file =  subset + os.sep + subset + ".csv"

train_raw_df = pd.read_csv(folder + file,sep=",")

loadpath = folder + subset + os.sep

In [3]:
models_classifiers = []
models_classifiers.append(('LinearSVC', LinearSVC(random_state=0, tol=1e-5)))
models_classifiers.append(('SGDClassifier', SGDClassifier(loss="hinge", penalty="l2", max_iter=5)))
models_classifiers.append(('LR', LogisticRegression()))

In [4]:
def imgToVector(fname):
    image = io.imread(loadpath + fname)
    image = img_as_ubyte(image)
    vector = image.reshape(150*150*3)
    #vector = vector.flatten()
    return vector

In [5]:
liste_image = Parallel(n_jobs=threads)(delayed(imgToVector)(str(i) + '.jpg') for i in train_raw_df['mediaid']);

In [6]:
y_train = train_raw_df['classid']

X_train = liste_image
for name, clf in models_classifiers:
    clf.fit(X_train, y_train)



In [7]:
#Load Test Data
subset = 'test'
file =  subset + os.sep + subset + ".csv"

test_raw_df = pd.read_csv(folder + file,sep=",")
test_raw_df.head()

loadpath = folder + subset + os.sep

In [8]:
liste_image_test = Parallel(n_jobs=threads)(delayed(imgToVector)(str(i) + '.jpg') for i in test_raw_df['mediaid']);

In [9]:
X_test = liste_image_test
y_true = test_raw_df['classid']

y_predicts = []
for name,clf in models_classifiers:
    y_predicts.append(clf.predict(X_test))

In [10]:
for y_predict in y_predicts:
    print("Accuracy: " + str(accuracy_score(y_true, y_predict)))
    print("Rappel: " + str(recall_score(y_true, y_predict,average='micro')))
    print("Precision: " + str(precision_score(y_true, y_predict,average='micro')))
    print("F-Measure: " + str(precision_score(y_true, y_predict,average='micro')))
    print('------------')

Accuracy: 0.20287769784172663
Rappel: 0.20287769784172663
Precision: 0.20287769784172663
F-Measure: 0.20287769784172663
------------
Accuracy: 0.14676258992805755
Rappel: 0.14676258992805755
Precision: 0.14676258992805755
F-Measure: 0.14676258992805755
------------
Accuracy: 0.20575539568345325
Rappel: 0.20575539568345325
Precision: 0.20575539568345325
F-Measure: 0.20575539568345325
------------
