# Assignment 3 (Submitted by: SAHIL 2016UCS0008)

## 1. Import libraries

In [283]:
import numpy as np
import scipy.io
import scipy.stats
import cv2
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from os import listdir
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn import utils
from sklearn.preprocessing import StandardScaler 
from sklearn.neural_network import MLPClassifier 
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

## 2. Common part for all the models (One time feature extraction, image loading)

### Extracting the feature vectors of the image using its reference image by SVD

In [284]:
# exploiting the vectorization methods already included in the numpy library
def get_feature_vector(ref_image, pred_image):
    
    # cropping the images to maintain a fixed length format dataset
    
    #reference image
    u_r, s_r, v_r = np.linalg.svd(ref_image[:400, :400])
    #predicted/distorted image
    u_p, s_p, v_p = np.linalg.svd(pred_image[:400, :400])
    #feature vector
    feature_vec = np.add(np.absolute(np.sum(u_r.conj()*u_p, axis=0)), np.absolute(np.sum(v_r.conj()*v_p, axis=0)))
    return feature_vec

### Load the .mat files 

In [285]:
dmos_mat = scipy.io.loadmat('Live Database/databaserelease2/dmos.mat')
ref_names = scipy.io.loadmat('Live Database/databaserelease2/refnames_all.mat')

# extraxt the arrays from the loaded mat files
refnames_all = ref_names['refnames_all']
dmos = dmos_mat['dmos']

### Load the reference images

In [286]:
ref_images_names = listdir("Live Database/databaserelease2/refimgs/")
# print(ref_images_names)
ref_images = {} # load the images in this dictionary
ref_images_count = len(ref_images_names)
for i in range(ref_images_count):
    ref_images[ref_images_names[i]] = cv2.imread('Live Database/databaserelease2/refimgs/' + str(ref_images_names[i]), 0)
    


### Create the feature vector

In [287]:
X = []
folders = ['jp2k', 'jpeg', 'wn', 'gblur', 'fastfading']
images = [227, 233, 174, 174, 174]
done = 0

In [288]:
for f in range(len(folders)):
    for i in range(images[f]):
        path = 'Live Database/databaserelease2/'+ folders[f] + '/img' + str(i + 1) + '.bmp'
        img_p = cv2.imread(path, 0)
        ref = refnames_all[:, done + i]
        img = ref_images[ref[0][0]]
        feature_vector = get_feature_vector(img, img_p)
        X.append(feature_vector)
    done += images[f]

In [289]:
print(ref)
print(ref[0][0])
print(done)

[array(['sailing4.bmp'], dtype='<U12')]
sailing4.bmp
982


In [290]:
X = np.array(X)
Y = dmos[0] # Create the output DMOS matrix for the features

## 3. Support Vector Regression

### Split into training and testing data

In [291]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

In [292]:
svr = SVR(C = 1.0, epsilon = 0.1)

### Create the SVR classifier, fit on train data, predict on test data

In [293]:
svr.fit(X_train, Y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [294]:
Y_pred = svr.predict(X_test)
svr.score(X_test, Y_test)

0.8733736870661665

### Compute the value of PLCC

In [295]:
plcc, pval = scipy.stats.pearsonr(Y_test, Y_pred)

print ("PLCC: {0:1.2f} PVAL: {1:1.2f}".format(plcc, pval) )

PLCC: 0.94 PVAL: 0.00


## 4. Logistic Regression

### Split into training and testing data

In [356]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

### Convert continuous output values in y_train to discrete values using encoding

In [357]:
# clf = LogisticRegression(random_state=0, solver='lbfgs').fit(X_train, Y_train)

# divide the DMOS values for the training data by 20 to make 5 classes
Y_train[:] = Y_train[:]/20
Y_test[:] = Y_test[:]/20


In [358]:
Y_train = Y_train.astype('int')
Y_test = Y_test.astype('int')
print("Training labels:", Y_train)
print("Testing labels:", Y_test)

Training labels: [1 1 1 3 3 2 0 0 2 3 1 3 0 0 1 3 1 2 0 2 0 0 2 0 2 1 3 2 1 2 0 1 1 1 0 0 2
 3 1 2 2 2 1 1 2 3 1 0 0 0 2 2 1 2 1 2 1 2 3 3 2 1 2 2 2 0 2 3 0 1 2 2 2 2
 2 1 1 1 0 3 2 1 1 3 0 3 2 1 3 2 0 0 2 1 1 1 1 1 0 0 0 0 0 1 2 1 2 1 1 2 2
 0 3 2 2 3 2 1 1 2 0 3 1 2 3 0 2 2 1 0 1 3 2 1 1 1 1 0 1 2 0 3 1 3 1 2 2 2
 0 3 1 1 1 0 0 3 1 1 0 2 3 1 0 2 2 0 1 0 3 1 2 3 3 2 1 2 0 1 1 1 1 1 0 1 0
 0 0 1 2 3 2 2 3 2 3 2 3 2 0 0 2 2 1 2 2 2 2 3 2 0 3 2 2 1 2 2 3 0 3 0 2 2
 0 0 2 2 3 3 1 2 2 0 0 1 1 3 3 1 2 2 2 1 1 2 1 0 3 4 3 1 1 0 2 0 2 1 1 1 2
 3 1 0 0 0 2 1 2 0 2 1 1 0 2 0 3 2 0 1 0 2 0 2 3 3 1 1 2 2 3 2 2 2 1 1 0 2
 1 0 1 0 2 3 3 3 0 1 1 2 2 0 2 1 1 2 3 0 3 2 1 2 0 0 1 3 2 1 1 0 1 2 1 2 2
 0 3 1 3 1 2 2 1 2 0 0 3 2 2 2 2 3 3 3 1 0 0 3 2 3 2 2 2 3 0 3 1 3 2 3 0 2
 2 3 3 3 1 2 2 1 3 2 0 3 3 3 3 1 3 2 1 2 1 2 1 1 0 1 3 0 1 3 2 1 3 2 0 3 1
 0 2 1 2 1 2 0 1 0 0 2 2 2 2 0 2 1 0 0 3 1 0 0 0 1 3 3 0 2 2 1 0 3 1 2 1 0
 2 0 1 3 2 1 2 2 0 1 2 3 1 0 2 2 2 1 2 1 2 0 1 3 2 2 1 0 1 3 1 3 0 3 2 0 3
 1 2 2 2

### Create the logistic regression classifier and fit on training data

In [359]:
logistic = LogisticRegression(random_state=0, solver='lbfgs').fit(X_train, Y_train)

### Predict on test data

In [360]:
Y_pred = logistic.predict(X_test)
print("Predicted values for Y:\n", Y_pred)
print("\nAcutal values for Y:\n", Y_test)

Predicted values for Y:
 [1 1 3 2 0 0 1 2 1 2 1 2 1 2 2 2 1 2 2 0 1 1 1 0 2 2 1 3 2 1 1 2 1 1 2 0 1
 1 1 1 2 2 2 1 3 1 2 1 1 0 1 1 1 2 3 0 1 1 2 0 1 3 1 2 1 2 1 2 1 2 1 1 2 2
 3 0 0 2 1 1 2 2 2 0 2 0 1 1 2 2 2 1 2 0 1 3 2 2 0 0 1 0 1 3 2 0 3 2 2 3 0
 2 2 1 2 2 2 0 2 1 3 2 1 0 2 1 3 2 1 2 0 0 1 0 2 1 2 0 2 0 2 1 3 1 1 2 2 2
 1 2 2 2 0 0 0 0 0 1 1 0 1 2 1 0 3 1 2 2 3 2 3 2 1 1 2 0 0 1 0 2 3 1 3 1 1
 0 2 2 0 2 3 0 0 0 2 2 2]

Acutal values for Y:
 [1 1 2 3 0 0 0 1 1 2 1 2 1 2 2 2 1 2 2 0 1 2 1 0 3 3 2 2 3 1 1 2 1 1 1 0 1
 1 1 1 1 2 1 2 3 1 2 2 1 0 2 1 2 2 2 0 1 1 2 0 1 3 1 2 1 2 1 1 1 2 1 2 3 2
 3 0 0 2 1 1 1 2 2 0 2 0 1 1 2 3 2 1 3 0 1 3 2 2 0 0 2 0 1 3 2 0 4 1 3 3 0
 2 2 2 1 2 3 0 2 1 2 2 1 0 4 1 3 2 1 2 0 0 1 0 2 1 2 0 2 0 1 1 3 1 1 2 2 2
 1 1 2 2 0 0 0 0 0 1 1 0 1 2 1 0 3 1 3 1 2 3 3 2 1 1 3 0 0 1 0 3 3 1 3 1 1
 0 2 2 0 3 3 0 0 0 2 2 2]


In [361]:
# generate the class probabilities
probs = logistic.predict_proba(X_test)
print(probs.shape)

(197, 5)


In [362]:
# score of the classifier
logistic.score(X_test, Y_test)

# from sklearn.metrics import accuracy_score   
# print(accuracy_score(y_test_encoded, Y_pred))

0.7868020304568528

### Compute the value of PLCC

In [363]:
plcc, pval = scipy.stats.pearsonr(Y_test, Y_pred)

print("PLCC: {0:1.2f} PVAL: {1:1.2f}".format(plcc, pval))

PLCC: 0.88 PVAL: 0.00


## 5. Neural Network 

### Split into training and testing data

In [349]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

# divide the DMOS values for the training data by 20 to make 5 classes
Y_train[:] = Y_train[:]/20
Y_test[:] = Y_test[:]/20

Y_train = Y_train.astype('int')
Y_test = Y_test.astype('int')

### Feature scaling

In [350]:
scaler = StandardScaler()  
# Fit only to the training data
scaler.fit(X_train)

# Now apply the transformations to the data:
X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test)  

### Training the model

In [352]:
# no of features per image
features = X_train.shape[1]
# print(features)

# The first parameter, hidden_layer_sizes, is used to set the size of the hidden layers 
# We will create one hidden layer having size equal to the no of features 
# The second parameter to MLPClassifier specifies the number of iterations, or the epochs,
# that the neural network executes. An epoch is a combination of one cycle of feed-forward and back propagation phase.
mlp = MLPClassifier(hidden_layer_sizes=(features), max_iter=1000)  
# By default the 'relu' activation function is used with 'adam' cost optimizer. 

mlp.fit(X_train, Y_train)  

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=400, learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

### Predict on the test data and evaluating the performance

In [353]:
Y_pred = mlp.predict(X_test)  

In [354]:
# print(confusion_matrix(y_test_encoded, Y_pred))  
# print(classification_report(y_test_encoded, Y_pred))  
print(accuracy_score(Y_test, Y_pred))

0.766497461928934


### Compute the value of PLCC

In [355]:
plcc, pval = scipy.stats.pearsonr(Y_test, Y_pred)

print ("PLCC: {0:1.2f} PVAL: {1:1.2f}".format(plcc, pval) )

PLCC: 0.87 PVAL: 0.00


## 6. Comparing different models 

In [366]:
# print("Score of SVR classifier is: ", np.mean(cross_val_score(svr, X, Y, cv=5)))  

In [373]:
score_svr = 0
score_lr = 0
score_nn = 0
for i in range(10):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)
    # use original Y_test for the SVR
    score_svr += svr.score(X_test, Y_test)
    
    # use classes in Y_test for Neural network and Logistic regression
    Y_train[:] = Y_train[:]/20
    Y_test[:] = Y_test[:]/20
    Y_train = Y_train.astype('int')
    Y_test = Y_test.astype('int')
    
    score_lr += logistic.score(X_test, Y_test)
    
    # transform the X_test for neural network classifier using feature scaling
    X_test = scaler.transform(X_test)
    score_nn += mlp.score(X_test, Y_test)
    
score_svr = score_svr/10.0
score_lr = score_lr/10.0
score_nn = score_nn/10.0
print("Score of SVR classifier is: {0:1.2f}".format(score_svr))
print("Score of Logistic regression classifier is: {0:1.2f}".format(score_lr))
print("Score of Neural network classifier is: {0:1.2f}".format(score_nn))

Score of SVR classifier is: 0.85
Score of Logistic regression classifier is: 0.87
Score of Neural network classifier is: 0.85
