# Assignment 3 (Submitted by: SAHIL 2016UCS0008)

## 1. Logistic Regression demo

### Import dataset

In [4]:
from sklearn import datasets

In [36]:
iris = sklearn.datasets.load_iris()

In [9]:
X = iris.data[:, :2]
y = (iris.target != 0)*1

In [13]:
print(X.shape, y.shape)

(150, 2) (150,)


### Use inbuilt logistic regression from sklearn library

In [18]:
from sklearn.linear_model import LogisticRegression

In [19]:
clf = LogisticRegression(random_state=0, solver='lbfgs').fit(X, y)

In [24]:
print(X[:1, :])

[[5.1 3.5]]


In [25]:
clf.predict(X[:1, :])

array([0])

In [26]:
clf.predict_proba(X[:1, :])

array([[0.89424588, 0.10575412]])

In [27]:
clf.score(X, y)

1.0

## 2. Import libraries

In [59]:
import numpy as np
import scipy.io
import scipy.stats
import cv2
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from os import listdir
from sklearn.linear_model import LogisticRegression

## 3. Common part for all the models 

### Extracting the feature vectors of the image using its reference image by SVD

In [30]:
# exploiting the vectorization methods already included in the numpy library
def get_feature_vector(ref_image, pred_image):
    
    # cropping the images to maintain a fixed length format dataset
    
    #reference image
    u_r, s_r, v_r = np.linalg.svd(ref_image[:400, :400])
    #predicted/distorted image
    u_p, s_p, v_p = np.linalg.svd(pred_image[:400, :400])
    #feature vector
    feature_vec = np.add(np.absolute(np.sum(u_r.conj()*u_p, axis=0)), np.absolute(np.sum(v_r.conj()*v_p, axis=0)))
    return feature_vec

### Load the .mat files 

In [31]:
dmos_mat = scipy.io.loadmat('Live Database/databaserelease2/dmos.mat')
ref_names = scipy.io.loadmat('Live Database/databaserelease2/refnames_all.mat')

# extraxt the arrays from the loaded mat files
refnames_all = ref_names['refnames_all']
orgs = dmos_mat['orgs']
dmos = dmos_mat['dmos']

### Load the reference images

In [32]:
ref_images_names = listdir("Live Database/databaserelease2/refimgs/")
# print(ref_images_names)
ref_images = {} # load the images in this dictionary
ref_images_count = len(ref_images_names)
for i in range(ref_images_count):
    ref_images[ref_images_names[i]] = cv2.imread('Live Database/databaserelease2/refimgs/' + str(ref_images_names[i]), 0)
    


### Create the feature vector

In [33]:
X = []
folders = ['jp2k', 'jpeg', 'wn', 'gblur', 'fastfading']
images = [227, 233, 174, 174, 174]
done = 0

In [35]:
for f in range(len(folders)):
    for i in range(images[f]):
        path = 'Live Database/databaserelease2/'+ folders[f] + '/img' + str(i + 1) + '.bmp'
        img_p = cv2.imread(path, 0)
        ref = refnames_all[:, done + i]
        img = ref_images[ref[0][0]]
        feature_vector = get_feature_vector(img, img_p)
        X.append(feature_vector)
    done += images[f]

In [37]:
print(ref)
print(ref[0][0])
print(done)

[array(['sailing4.bmp'], dtype='<U12')]
sailing4.bmp
982


In [46]:
X = np.array(X)
Y = dmos[0] # Create the output DMOS matrix for the features

## 4. Support Vector Regression

### Split into training and testing data

In [107]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

### Create the SVR classifier, fit on train data, predict on test data

In [108]:
clf = SVR(C = 1.0, epsilon = 0.1)

In [109]:
clf.fit(X_train, Y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [112]:
Y_pred = clf.predict(X_test)
clf.score(X_test, Y_test)

0.8651527390330838

### Compute the value of PLCC

In [113]:
plcc, pval = scipy.stats.pearsonr(Y_test, Y_pred)

print ("PLCC: {0:1.2f} PVAL: {1:1.2f}".format(plcc, pval) )

PLCC: 0.94 PVAL: 0.00


## 5. Logistic Regression

### Convert continuous output values in y_train to discrete values using encoding

In [114]:
# clf = LogisticRegression(random_state=0, solver='lbfgs').fit(X_train, Y_train)

# divide the DMOS values for the training data by 100 as kind of feature scaling
Y_train[:] = Y_train[:]/100

In [115]:
from sklearn import preprocessing
from sklearn import utils

lab_enc = preprocessing.LabelEncoder()
y_train_encoded = lab_enc.fit_transform(Y_train)

In [122]:
# print(encoded)
print("Y_train:", utils.multiclass.type_of_target(Y_train))
print("Y_train as int:", utils.multiclass.type_of_target(Y_train.astype('int')))
print("Y_train encoded:", utils.multiclass.type_of_target(y_train_encoded))

print(X_train.shape)

Y_train: continuous
Y_train as int: binary
Y_train encoded: multiclass
(785, 400)


### Create the logistic regression classifier and fit on training data

In [117]:
clf = LogisticRegression(random_state=0, solver='lbfgs').fit(X_train, y_train_encoded)

### Predict on test data

In [118]:
y_test_encoded = lab_enc.fit_transform(Y_test)
Y_pred = clf.predict(X_test)
print(Y_pred)

[578  52 452 452   0 558   0   0   0   0   0 534 123 358 461 100   0   0
 267 141 326 395 482 603  37   0 374 402  90 590   0   0 199   0 381 291
 381 588 236 384 202 566 357  67 141   0 150 332   0 388 551 189 245 246
 591 487 416   0   6 449 150 612 128 133 274 598 388 334 210 165   0   0
 219   0 408 163 569 246 465 223 143 578 177  23 348   0 245 541 601 449
 416 141  18   0  41   0 319 452 556 212 402 278   0 408 549 576   0 339
 384  23 452   0 603   0 194   0 603 603   0 246   0   0 422  71   0 223
 208 133   0 245  24   0  17   0 240 607 401 206  87   0 549   0 179 625
 357 618   0 260 346 629 548 109 236 257 182   0 595 251 406 237   0   0
 253   0 119   0 228 107   0 319  25   0   0 474 387 232 157   0  20   0
   0 262   0 177  41 515 239 260 360 233 592   0 248 629 386 298 514]


In [119]:
# generate the class probabilities
probs = clf.predict_proba(X_test)
print(probs.shape)

(197, 632)


In [120]:
# score of the classifier
clf.score(X_test, y_test_encoded)

0.24873096446700507

### Compute the value of PLCC

In [121]:
plcc, pval = scipy.stats.pearsonr(y_test_encoded, Y_pred)

print("PLCC: {0:1.2f} PVAL: {1:1.2f}".format(plcc, pval))

PLCC: 0.91 PVAL: 0.00
