You can use the following commands to perform LinearRegression on the AVA-Dataset using the Embedding Files we created earlier:

In [1]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from datetime import datetime

from scipy.stats import spearmanr, pearsonr
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
import scipy

from sklearn.linear_model import LinearRegression


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
#Set BASE_PATH to the directory containing the embeddings created using either the CLIP or IN21k ResNet Variants


BASE_PATH = './enocdings_clip'
#BASE_PATH = './encodings_in21k'


train_encodings, train_y = torch.load(os.path.join(BASE_PATH, 'train_encodings.pt')), torch.load(os.path.join(BASE_PATH, 'train_ratings.pt'))
test_encodings, test_y = torch.load(os.path.join(BASE_PATH, 'test_encodings.pt')), torch.load(os.path.join(BASE_PATH, 'test_ratings.pt'))

train_y = (train_y @ torch.tensor([1,2,3,4,5,6,7,8,9,10], dtype=torch.float32)).unsqueeze(dim=1)
test_y = (test_y @ torch.tensor([1,2,3,4,5,6,7,8,9,10], dtype=torch.float32)).unsqueeze(dim=1)

test_y = test_y.numpy()
train_y = train_y.numpy()
train_encodings = train_encodings.numpy()
test_encodings = test_encodings.numpy()

scaler = StandardScaler()
scaler.fit(train_encodings)
train_encodings = scaler.transform(train_encodings)
test_encodings = scaler.transform(test_encodings)

In [None]:
lin_reg = LinearRegression().fit(train_encodings, train_y)

In [None]:
pred = lin_reg.predict(test_encodings)

labels = (test_y > 5.0)
binary_pred = pred > 5.0

spearman, p = spearmanr(pred, test_y)
pearson, p2 = pearsonr(pred.squeeze(), test_y.squeeze())



print(f"\n------------------ \nLinear Regression :")
print(f"Binary Results: \n")
print(classification_report(labels, binary_pred, digits=4, target_names= ['bad', 'good']))

print(f"Continuous Results: \n")
print(f"Spearman: {spearman}\n") 
print(f"Pearson: {pearson}\n") 

[5.3840265]
2.3244534 7.304132 5.3783875

------------------ 
Linear Regression :
Binary Results: 

              precision    recall  f1-score   support

         bad     0.7072    0.5202    0.5995      7597
        good     0.8174    0.9089    0.8607     17954

    accuracy                         0.7933     25551
   macro avg     0.7623    0.7145    0.7301     25551
weighted avg     0.7847    0.7933    0.7830     25551

Continuous Results: 

Spearman: 0.673779405527753

Pearson: 0.681997027278449

