In [62]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# pip install tensorflow==2.16.1
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error
import tensorflow as tf
import tensorflow.keras.backend as K
from sklearn.metrics import precision_score, recall_score, f1_score

In [18]:
model = tf.keras.models.load_model("test3.keras")

In [27]:
def predict(model, X_test):
    X_test = [X_test[:, 0], X_test[:, 1]]
    return model.predict(X_test)

def evaluate(model, X_test, y_test):
    X_test = [X_test[:, 0], X_test[:, 1]]
    # y_test = [y_test[:, 0], y_test[:, 1]]
    return model.evaluate(X_test, y_test, verbose=0)

In [20]:
df = pd.read_csv("./data/item_ratings.csv")#.iloc[:1000,:]
X = df[['userId','movieId']].to_numpy()
y = df['rating'].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
num_users = df['userId'].max() + 1
num_items = df['movieId'].max() + 1

In [21]:
y_pred = predict(model, X_test)

[1m  147/67669[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m47s[0m 702us/step



[1m67669/67669[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 492us/step


In [None]:
def recall_m(y_true, y_pred, batch_size=1024):
    def process_batch(start, end):
        batch_y_true = y_true[start:end]
        batch_y_pred = y_pred[start:end]
        true_positives = K.sum(K.round(K.clip(batch_y_true * batch_y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(batch_y_true, 0, 1)))
        return true_positives, possible_positives
    num_samples = y_true.shape[0]
    true_positives_total = 0
    possible_positives_total = 0
    for start in range(0, num_samples, batch_size):
        end = min(start + batch_size, num_samples)
        batch_true_positives, batch_possible_positives = process_batch(start, end)
        true_positives_total += batch_true_positives
    print(f"true positives: {true_positives_total}")
    print(f"possible positives: {possible_positives_total}")
    recall = recall_score()
    return recall


def precision_m(y_true, y_pred, batch_size=1024):
    def process_batch(start, end):
        batch_y_true = y_true[start:end]
        batch_y_pred = y_pred[start:end]
        true_positives = K.sum(K.round(K.clip(batch_y_true * batch_y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(batch_y_pred, 0, 1)))
        return true_positives, predicted_positives
    num_samples = y_true.shape[0]
    true_positives_total = 0
    predicted_positives_total = 0
    for start in range(0, num_samples, batch_size):
        end = min(start + batch_size, num_samples)
        batch_true_positives, batch_predicted_positives = process_batch(start, end)
        true_positives_total += batch_true_positives
        predicted_positives_total += batch_predicted_positives
    print(f"true positives: {true_positives_total}")
    print(f"predicted positives: {predicted_positives_total}")
    precision = float(true_positives_total) / float(predicted_positives_total + K.epsilon())
    return precision

def f1_score_fast(precision, recall):
    f1_score = 2 * ((precision * recall) / (precision + recall + K.epsilon()))
    return f1_score

def f1_m(y_true, y_pred, batch_size=1024):
    precision = precision_m(y_true, y_pred, batch_size)
    recall = recall_m(y_true, y_pred, batch_size)
    f1_score = 2 * ((precision * recall) / (precision + recall + K.epsilon()))
    return f1_score

In [65]:
y_test

array([3.5, 4.5, 4. , ..., 5. , 1. , 4. ])

In [72]:
y_pred = y_pred.flatten()
y_pred

array([1., 1., 1., ..., 1., 1., 1.], dtype=float32)

In [71]:
recall = recall_score(y_test, y_pred, average='multiclass')
precision = precision_score(y_test, y_pred, average='')
f1 = f1_score(precision, recall, average='macro')
loss = evaluate(model, X_test, y_test)


InvalidParameterError: The 'average' parameter of recall_score must be a str among {'macro', 'micro', 'samples', 'weighted', 'binary'} or None. Got 'multiclass' instead.

In [61]:
metrics = {
        'loss': loss,
        # 'accuracy': accuracy,
        'f1-score': f1.numpy(),
        'precision': precision,
        'recall': recall.numpy()
        }
metrics

{'loss': 7.555699348449707,
 'f1-score': 2021.6388014729873,
 'precision': 1010.8194007365404,
 'recall': 2.188684794e+16}

In [49]:
import json

with open('metrics2.txt', 'w') as f:
    json.dump(metrics, f)