In [21]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pickle
import pandas as pd

regression_df = pd.read_csv('regression.csv')
y = regression_df['score'].tolist()[:380]

def evaluate_regression_model(embeddings_file, y):
    with open(embeddings_file, 'rb') as f:
        embeddings = pickle.load(f)
    
    embeddings_np = np.array(embeddings)[:380]
    
    X_train, X_test, y_train, y_test = train_test_split(embeddings_np, y, test_size=0.2, random_state=42)
    
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    
    y_pred = regressor.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Results for {embeddings_file}:")
    print(f"Mean Squared Error: {mse:.2f}")
    print(f"Mean Absolute Error: {mae:.2f}")
    print(f"R-squared: {r2:.2f}")
    print()

# evaluate_regression_model('first_layer_embeddings.pkl', y)
# evaluate_regression_model('mid_layer_embeddings.pkl', y)
# evaluate_regression_model('final_layer_embeddings.pkl', y)
evaluate_regression_model('1_regression.pkl', y)
evaluate_regression_model('2_regression.pkl', y)
evaluate_regression_model('3_regression.pkl', y)


Results for 1_regression.pkl:
Mean Squared Error: 0.96
Mean Absolute Error: 0.80
R-squared: -0.01

Results for 2_regression.pkl:
Mean Squared Error: 0.57
Mean Absolute Error: 0.55
R-squared: 0.40

Results for 3_regression.pkl:
Mean Squared Error: 0.54
Mean Absolute Error: 0.59
R-squared: 0.43

