# Assignment 3 (Submitted by: SAHIL 2016UCS0008)

## 1. Logistic Regression demo

### Import dataset

In [4]:
from sklearn import datasets

In [36]:
iris = sklearn.datasets.load_iris()

In [9]:
X = iris.data[:, :2]
y = (iris.target != 0)*1

In [13]:
print(X.shape, y.shape)

(150, 2) (150,)


### Use inbuilt logistic regression from sklearn library

In [18]:
from sklearn.linear_model import LogisticRegression

In [19]:
clf = LogisticRegression(random_state=0, solver='lbfgs').fit(X, y)

In [24]:
print(X[:1, :])

[[5.1 3.5]]


In [25]:
clf.predict(X[:1, :])

array([0])

In [26]:
clf.predict_proba(X[:1, :])

array([[0.89424588, 0.10575412]])

In [27]:
clf.score(X, y)

1.0

## 2. Import libraries

In [59]:
import numpy as np
import scipy.io
import scipy.stats
import cv2
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from os import listdir
from sklearn.linear_model import LogisticRegression

## 3. Common part for all the models 

### Extracting the feature vectors of the image using its reference image by SVD

In [30]:
# exploiting the vectorization methods already included in the numpy library
def get_feature_vector(ref_image, pred_image):
    
    # cropping the images to maintain a fixed length format dataset
    
    #reference image
    u_r, s_r, v_r = np.linalg.svd(ref_image[:400, :400])
    #predicted/distorted image
    u_p, s_p, v_p = np.linalg.svd(pred_image[:400, :400])
    #feature vector
    feature_vec = np.add(np.absolute(np.sum(u_r.conj()*u_p, axis=0)), np.absolute(np.sum(v_r.conj()*v_p, axis=0)))
    return feature_vec

### Load the .mat files 

In [31]:
dmos_mat = scipy.io.loadmat('Live Database/databaserelease2/dmos.mat')
ref_names = scipy.io.loadmat('Live Database/databaserelease2/refnames_all.mat')

# extraxt the arrays from the loaded mat files
refnames_all = ref_names['refnames_all']
orgs = dmos_mat['orgs']
dmos = dmos_mat['dmos']

### Load the reference images

In [32]:
ref_images_names = listdir("Live Database/databaserelease2/refimgs/")
# print(ref_images_names)
ref_images = {} # load the images in this dictionary
ref_images_count = len(ref_images_names)
for i in range(ref_images_count):
    ref_images[ref_images_names[i]] = cv2.imread('Live Database/databaserelease2/refimgs/' + str(ref_images_names[i]), 0)
    


### Create the feature vector

In [33]:
X = []
folders = ['jp2k', 'jpeg', 'wn', 'gblur', 'fastfading']
images = [227, 233, 174, 174, 174]
done = 0

In [35]:
for f in range(len(folders)):
    for i in range(images[f]):
        path = 'Live Database/databaserelease2/'+ folders[f] + '/img' + str(i + 1) + '.bmp'
        img_p = cv2.imread(path, 0)
        ref = refnames_all[:, done + i]
        img = ref_images[ref[0][0]]
        feature_vector = get_feature_vector(img, img_p)
        X.append(feature_vector)
    done += images[f]

In [37]:
print(ref)
print(ref[0][0])
print(done)

[array(['sailing4.bmp'], dtype='<U12')]
sailing4.bmp
982


In [46]:
X = np.array(X)
Y = dmos[0] # Create the output DMOS matrix for the features

## 4. Support Vector Regression

### Split into training and testing data

In [172]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

In [173]:
clf = SVR(C = 1.0, epsilon = 0.1)

### Create the SVR classifier, fit on train data, predict on test data

In [174]:
clf.fit(X_train, Y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [175]:
Y_pred = clf.predict(X_test)
clf.score(X_test, Y_test)

0.8527260286599453

### Compute the value of PLCC

In [176]:
plcc, pval = scipy.stats.pearsonr(Y_test, Y_pred)

print ("PLCC: {0:1.2f} PVAL: {1:1.2f}".format(plcc, pval) )

PLCC: 0.93 PVAL: 0.00


## 5. Logistic Regression

### Split into training and testing data

In [177]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

### Convert continuous output values in y_train to discrete values using encoding

In [178]:
# clf = LogisticRegression(random_state=0, solver='lbfgs').fit(X_train, Y_train)

# divide the DMOS values for the training data by 100 to make them in the 
Y_train[:] = Y_train[:]/100
Y_test[:] = Y_test[:]/100

In [179]:
from sklearn import preprocessing
from sklearn import utils

lab_enc = preprocessing.LabelEncoder()
y_train_encoded = lab_enc.fit_transform(Y_train)

In [180]:
# print(encoded)
print("Y_train:", utils.multiclass.type_of_target(Y_train))
print("Y_train as int:", utils.multiclass.type_of_target(Y_train.astype('int')))
print("Y_train encoded:", utils.multiclass.type_of_target(y_train_encoded))

print(X_train.shape)

Y_train: continuous
Y_train as int: binary
Y_train encoded: multiclass
(785, 400)


### Create the logistic regression classifier and fit on training data

In [181]:
clf = LogisticRegression(random_state=0, solver='lbfgs').fit(X_train, y_train_encoded)

### Predict on test data

In [182]:
y_test_encoded = lab_enc.fit_transform(Y_test)
Y_pred = clf.predict(X_test)
print("Predicted values for Y:\n", Y_pred)
print("\nAcutal values for Y:\n", y_test_encoded)

Predicted values for Y:
 [259 406   0 137   0 406 229 157 101 371   0   0   0 500 348 229 363   0
 419   0   0 194   0 273 531  99   0 164 148 549 611 381 192 607 429 222
 459   0 341  92  92   0 186 449 167 532 176 398   0 549 157 329 299 277
 417 191 152 549 331   0   0 130 398 145 112   0   0 138 575 592 416  23
   0 223 476 311 290 585   0 538 554  60  89 369   0 523 374   0 214   0
 391 433   0 458  51 622 580 355 378 484 313 161 370 386 417 239 258 519
   0   0   0 227  47   0 485  39 164 550 378 225 337   0 342 325 136 538
   0   0 297   0 322 188  31 243   0 449 448 158 227 166   0 165 114 390
 546  45 513 398   0 458  76 464 264 320   0 263 449 578 276 491 378 178
 132 443 538 227 294  19   0   0 336   0 497 417 230 197 325 166 320   0
 550 398 391 592   0 155 412   0 209 200 363 493 219 230 271   0 473]

Acutal values for Y:
 [ 95 111   0  57   0 105  62  12  19 129   0   0   0 120  98  91  99   0
 136   0   0  35   0  40 142  23   0   7  18 112 151  73 140  72 132 118
  87  

In [183]:
# generate the class probabilities
probs = clf.predict_proba(X_test)
print(probs.shape)

(197, 625)


In [184]:
# score of the classifier
clf.score(X_test, y_test_encoded)

0.2131979695431472

### Compute the value of PLCC

In [185]:
plcc, pval = scipy.stats.pearsonr(y_test_encoded, Y_pred)

print("PLCC: {0:1.2f} PVAL: {1:1.2f}".format(plcc, pval))

PLCC: 0.85 PVAL: 0.00


## 6. Neural Network 

### Split into training and testing data

In [186]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

### Label encoding

In [201]:
from sklearn import preprocessing
from sklearn import utils

lab_enc = preprocessing.LabelEncoder()
y_train_encoded = lab_enc.fit_transform(Y_train)
y_test_encoded = lab_enc.fit_transform(Y_test)

### Feature scaling

In [189]:
from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler()  
# Fit only to the training data
scaler.fit(X_train)

# Now apply the transformations to the data:
X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test)  

### Training the model

In [197]:
from sklearn.neural_network import MLPClassifier  
# no of features per image
features = X_train.shape[1]
# print(features)

# The first parameter, hidden_layer_sizes, is used to set the size of the hidden layers 
# We will create one hidden layer having size equal to the no of features 
# The second parameter to MLPClassifier specifies the number of iterations, or the epochs,
# that the neural network executes. An epoch is a combination of one cycle of feed-forward and back propagation phase.
mlp = MLPClassifier(hidden_layer_sizes=(features), max_iter=1000)  
# By default the 'relu' activation function is used with 'adam' cost optimizer. 

mlp.fit(X_train, y_train_encoded)  

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=400, learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

### Predict on the test data and evaluating the performance

In [199]:
Y_pred = mlp.predict(X_test)  

In [208]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score  
# print(confusion_matrix(y_test_encoded, Y_pred))  
# print(classification_report(y_test_encoded, Y_pred))  
print(accuracy_score(y_test_encoded, Y_pred))

0.2436548223350254
