In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn import svm
from sklearn.metrics import confusion_matrix, accuracy_score
from datareplication import datareplication, generate_synthetic_data1
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

def infinitesimal_edges (X):
    bin_edges = []
    D = np.shape(X)[1]
    for i in np.arange(D):
        aux = X[:,i]
        aux = np.unique(aux)
        edges = (aux[:-1]+aux[1:])*0.5
        bin_edges.append(edges)
    return bin_edges 

def digital_encoding (X, ledges):
    XX = np.copy(X)
    D = np.shape(X)[1]
    for i in np.arange(D):
        edges = ledges[i]
        XX[:,i] = np.digitize(X[:,i], edges)
    return XX

def triangularEncoder(X, ledges): #differential encoding
    #X: data digitized with digital_encoding
    D = np.shape(X)[1]
    XX = []
    for i in np.arange(D):
        CC = len(ledges[i])
        aux = np.tri(CC+1) #lower triangular
        aux = aux[X[:,i],:]
        XX.append(aux)
    return np.concatenate(XX, axis = 1)

import pandas as pd

In [11]:
# Read a CSV file
df = pd.read_csv('../datasets/aesthetic_evaluation_data.csv')
# map subjective evaluation to 0,1,2,3
df['Subjective Evaluation'] = df['Subjective Evaluation'].map({'Poor': 4, 'Fair': 3, 'Good': 2, 'Excellent': 1})
df['Objective Evaluation'] = df['Objective Evaluation'].map({'Poor': 4, 'Fair': 3, 'Good': 2, 'Excellent': 1})
X = df[['pLBC Value', 'pBCE Value', 'cX2b Value', 'pUNR Value']].to_numpy()
print("X: ", X)
y = df['Subjective Evaluation'].to_numpy() - 1 #code assumes classes from 0 to K-1
print(np.unique(y))
y = pd.Series(y).map({0: 3, 1: 2, 2: 1, 3: 0})
print(y)
y = y.to_numpy()
y_obj = df['Objective Evaluation'].to_numpy() - 1 #code assumes classes from 0 to K-1
K = 4 # n of classes
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

train_edges = infinitesimal_edges (X_train)
XX = digital_encoding (X_train, train_edges)
XX = triangularEncoder(XX.astype(int), train_edges)
XX, yy = datareplication (XX, K, y_train)

#test classifier
#clf = svm.LinearSVC(C = 0.05, penalty='l1')
#clf = LogisticRegression(random_state=0, penalty = 'l2', solver='newton-cholesky', C=0.04)
clf = LogisticRegression(random_state=0, penalty = 'elasticnet', l1_ratio = 0.4, solver='saga', C=0.06)
clf.fit(XX, np.reshape(yy, (-1)))

#train performance
y_pred = clf.predict(XX)  
y_pred = np.reshape (y_pred, (-1, K-1))
y_pred = np.sum(y_pred, axis=1)
cm = confusion_matrix(y_train, y_pred)
accuracy = accuracy_score(y_train, y_pred)
print("confusion matrix on training data:", cm)
print("accuracy on training data:", accuracy)

#TEST DATA
XX = digital_encoding (X_test, train_edges)
XX = triangularEncoder(XX.astype(int), train_edges)
XX, yy = datareplication (XX, K, y_test)

y_pred = clf.predict(XX)
y_pred = np.reshape (y_pred, (-1, K-1))
y_pred = np.sum(y_pred, axis=1)

cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print("confusion matrix on test data:", cm)
print("accuracy on test data:", accuracy)

##CHECK PERFORMANCE OF OLD SCORECARD
""" def scorecardPrediction(features) :
    prediction = -1
    points = 0.0
    if (features[0]<0.0174):
        points = 0
    elif (features[0]<0.1012):
        points = 0.1538
    elif (features[0]<0.1707):
        points = 0.4379
    else:
        points = 0.5767

    if (features[1]<0.0184):
        points += 0
    elif (features[1]<0.3684):
        points += 0.0636
    elif (features[1]<0.9219):
        points += 0.1938
    else :
        points += 0.2128

    if (features[2]<0.0140):
        points += 0
    elif (features[2]<0.0267):
        points += 0.0299
    elif (features[2]<0.6881):
        points += 0.0642
    else:
        points += 0.0727

    if (points < 0.1738):
        prediction = 1
    elif (points<0.3720):
        prediction = 2
    elif (points<0.7002):
        prediction = 3
    else:
        prediction = 4
    return prediction

old_pred = [scorecardPrediction(row) for row in X]
#print (old_pred)
old_pred = np.array(old_pred)-1
print ( y[0:10], y_obj[0:10], old_pred[0:10])
cm = confusion_matrix(y, old_pred)
accuracy = accuracy_score(y, old_pred)
print (cm, accuracy)  

print ('OBJ vs SUB in EXCEL')
cm = confusion_matrix(y, y_obj)
accuracy = accuracy_score(y, y_obj)
print (cm, accuracy)   """

X:  [[0.03335584 0.17315642 0.21716945 0.00552159]
 [0.05850918 0.05062446 0.19331673 0.09257023]
 [0.0386133  0.17600406 0.28062475 0.01577757]
 ...
 [0.01697814 0.14373508 0.02367085 0.05983761]
 [0.02715695 0.04932025 0.15577354 0.05534174]
 [0.0891616  0.0522731  0.35415953 0.1487727 ]]
[0 1 2 3]
0       2
1       2
2       2
3       2
4       2
       ..
1353    1
1354    2
1355    2
1356    3
1357    1
Length: 1358, dtype: int64
confusion matrix on training data: [[ 49  11   4   2]
 [ 25  75 129   6]
 [  3  15 362  29]
 [  0   0 126  73]]
accuracy on training data: 0.6149614961496149
confusion matrix on test data: [[ 20   7   9   0]
 [ 19  28  68   3]
 [  1   8 159  18]
 [  0   0  66  43]]
accuracy on test data: 0.5567928730512249




" def scorecardPrediction(features) :\n    prediction = -1\n    points = 0.0\n    if (features[0]<0.0174):\n        points = 0\n    elif (features[0]<0.1012):\n        points = 0.1538\n    elif (features[0]<0.1707):\n        points = 0.4379\n    else:\n        points = 0.5767\n\n    if (features[1]<0.0184):\n        points += 0\n    elif (features[1]<0.3684):\n        points += 0.0636\n    elif (features[1]<0.9219):\n        points += 0.1938\n    else :\n        points += 0.2128\n\n    if (features[2]<0.0140):\n        points += 0\n    elif (features[2]<0.0267):\n        points += 0.0299\n    elif (features[2]<0.6881):\n        points += 0.0642\n    else:\n        points += 0.0727\n\n    if (points < 0.1738):\n        prediction = 1\n    elif (points<0.3720):\n        prediction = 2\n    elif (points<0.7002):\n        prediction = 3\n    else:\n        prediction = 4\n    return prediction\n\nold_pred = [scorecardPrediction(row) for row in X]\n#print (old_pred)\nold_pred = np.array(old

In [None]:
binary_X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
binary_X = pd.DataFrame(binary_X)
binary_X.columns = ['col1', 'col2']

binary_y = np.array([0,1,2,1])
binary_y = pd.Series(binary_y)

bin_edges = infinitesimal_edges(binary_X.to_numpy())
print("Binary Edges:", bin_edges)

encoded = digital_encoding(binary_X.to_numpy(), bin_edges)
encoded = triangularEncoder(encoded.astype(int), bin_edges)
print("Encoded Data:", encoded)

replicated_data, replicated_labels = datareplication(encoded, 3, binary_y.to_numpy())
print("Replicated Data:", replicated_data)
print("Replicated Labels:", replicated_labels)

Binary Edges: [array([2., 4., 6.]), array([3., 5., 7.])]
Encoded Data: [[1. 0. 0. 0. 1. 0. 0. 0.]
 [1. 1. 0. 0. 1. 1. 0. 0.]
 [1. 1. 1. 0. 1. 1. 1. 0.]
 [1. 1. 1. 1. 1. 1. 1. 1.]]
Replicated Data: [[1. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 1. 0. 0. 0. 1.]
 [1. 1. 0. 0. 1. 1. 0. 0. 0.]
 [1. 1. 0. 0. 1. 1. 0. 0. 1.]
 [1. 1. 1. 0. 1. 1. 1. 0. 0.]
 [1. 1. 1. 0. 1. 1. 1. 0. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 0.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1.]]
Replicated Labels: [[0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]]
