In [1]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from glob import glob
import pandas as pd
import numpy as np
import os.path as path

In [2]:
def read_feret_data(imgs_folder='feret', extension='jpg', train=True, only_labels=False):
    """ Read FERET image file names and extract labels from it.

    Parameters
    ----------
    imgs_folder: str
        Path, where the images are located. This should be the name of the folder
        and there should be 2 different folders inside this folder named as
        train and test.
    extension: str
        Image file extension. Default is jpg
    train: bool
        Whether to select train or test files. If false, it means choose
        test files from imgs_folder/test/FILES...
    only_labels: bool
        If this is True, then only label values will returned as a numpy array

    Returns
    -------
    df or y_true: DataFrame or np.ndarray
        A dataframe contains names of the files, personids and positions.
        Column names are : Names, PersonID, Position.

        If only_labels was set to True then just a ndarray for image labels
    """

    if train:
        files = glob(path.join(imgs_folder, 'train', '*.{}'.format(extension)))
        df = pd.DataFrame(files, columns=['Names'])
        df = df.Names.str\
            .extract(r'(?P<Names>{}\\train\\(?P<PersonID>\d+)_\d+_(?P<Position>\w\w)_?(?P<Optional>[a-c]?).{})'
                     .format(imgs_folder, extension))
    else:
        files = glob(path.join(imgs_folder, 'test', '*.{}'.format(extension)))
        df = pd.DataFrame(files, columns=['Names'])
        df = df.Names.str\
            .extract(r'(?P<Names>{}\\test\\(?P<PersonID>\d+)_\d+_(?P<Position>\w\w)_?(?P<Optional>[a-c]?).{})'
                     .format(imgs_folder, extension))
    df['PersonID'] = df['PersonID'].astype('int')

    if only_labels:
        return df.PersonID.values

    # Map empty strings to NaN
    df['Optional'] = df['Optional'].where(df.Optional != '', np.nan)
    return df


In [3]:
X_train = np.load(path.join('embeddings/feret_train_embeddings.npy'))
X_test = np.load(path.join('embeddings/feret_test_embeddings.npy'))

In [4]:
y_train = read_feret_data(only_labels=True)
y_test = read_feret_data(extension='png', train=False, only_labels=True)



In [5]:
def print_results(y_true, y_pred):
    print('ACC Score : ', metrics.accuracy_score(y_true, y_pred))
    print('F1 Score with macro avg : ', metrics.f1_score(y_true, y_pred, average='macro'))
    print('F1 Scores with micro avg: ', metrics.f1_score(y_true, y_pred, average='micro'))
    print('F1 Scores with micro weighted: ', metrics.f1_score(y_true, y_pred, average='weighted'))
    print('Precision with macro avg : ', metrics.precision_score(y_true, y_pred, average='macro'))
    print('Precision with micro avg : ', metrics.precision_score(y_true, y_pred, average='micro'))
    print('Precision with micro weighted : ', metrics.precision_score(y_true, y_pred, average='weighted'))

In [7]:
log = LogisticRegression(n_jobs=-1, max_iter=500, solver='lbfgs')

In [8]:
log.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=500, multi_class='ovr', n_jobs=-1,
          penalty='l2', random_state=None, solver='lbfgs', tol=0.0001,
          verbose=0, warm_start=False)

In [9]:
y_pred = log.predict(X_test)

In [10]:
print_results(y_test, y_pred)

ACC Score :  0.983088235294
F1 Score with macro avg :  0.975154451813
F1 Scores with micro avg:  0.983088235294
F1 Scores with micro weighted:  0.979847041079
Precision with macro avg :  0.978814216188
Precision with micro avg :  0.983088235294
Precision with micro weighted :  0.981914951878


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [11]:
print(metrics.classification_report(y_test, y_pred))

             precision    recall  f1-score   support

          1       1.00      0.50      0.67         2
          2       1.00      1.00      1.00        10
          3       0.89      1.00      0.94         8
          4       1.00      1.00      1.00         2
          5       1.00      1.00      1.00         4
          6       1.00      1.00      1.00         4
          7       1.00      1.00      1.00         2
          8       1.00      1.00      1.00         2
          9       1.00      1.00      1.00         2
         10       1.00      1.00      1.00         2
         12       1.00      1.00      1.00         4
         13       0.67      1.00      0.80         4
         14       1.00      1.00      1.00         2
         15       1.00      1.00      1.00         2
         16       1.00      1.00      1.00         2
         17       1.00      1.00      1.00         2
         18       1.00      1.00      1.00         2
         19       1.00      1.00      1.00   

  'precision', 'predicted', average, warn_for)


In [36]:
svm = SVC(kernel='linear', verbose=1)

In [37]:
svm.fit(X_train, y_train)

[LibSVM]

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=1)

In [38]:
y_pred = svm.predict(X_test)

In [39]:
print_results(y_test, y_pred)

ACC Score :  0.979779411765
F1 Score with macro avg :  0.969895944523
F1 Scores with micro avg:  0.979779411765
F1 Scores with micro weighted:  0.975089547731
Precision with macro avg :  0.972894872845
Precision with micro avg :  0.979779411765
Precision with micro weighted :  0.976280504626


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [40]:
print(metrics.classification_report(y_test, y_pred))

             precision    recall  f1-score   support

          1       1.00      1.00      1.00         2
          2       1.00      1.00      1.00        10
          3       0.80      1.00      0.89         8
          4       1.00      1.00      1.00         2
          5       1.00      1.00      1.00         4
          6       1.00      1.00      1.00         4
          7       1.00      1.00      1.00         2
          8       0.67      1.00      0.80         2
          9       0.67      1.00      0.80         2
         10       1.00      1.00      1.00         2
         12       1.00      1.00      1.00         4
         13       1.00      1.00      1.00         4
         14       1.00      1.00      1.00         2
         15       1.00      1.00      1.00         2
         16       1.00      1.00      1.00         2
         17       1.00      1.00      1.00         2
         18       1.00      1.00      1.00         2
         19       1.00      1.00      1.00   

  'precision', 'predicted', average, warn_for)


In [None]:
from src import create_model