In [2]:
import pandas as pd
import numpy as np
import pandas as pd
from math import log2
import random

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Load the datasets
football_news_df = pd.read_csv('../data/news_test.csv', encoding='latin1')
matches_df = pd.read_csv('../data/matches.csv', encoding='latin1')
users_df = pd.read_csv('../data/users.csv')
bets_df = pd.read_csv('../data/bets.csv')
liked_clubs_df = pd.read_csv('../data/liked_clubs.csv')
clubs_df = pd.read_csv('../data/clubs.csv')
feedback_file_path = '../data/feedback.csv'

try:
    feedback_df = pd.read_csv(feedback_file_path)
except FileNotFoundError:
    feedback_df = pd.DataFrame(columns=['user_id', 'news_id', 'action', 'rating'])

## 1. Algo: tf-idf

In [3]:
# preprocess data
def preprocess_data(news_df):
    news_df = news_df.dropna(subset=['Content', 'Title'])
    news_df['Content'] = news_df['Content'].fillna('')
    return news_df

# generate test data
def generate_test_data(users, news_ids, num_relevant=5):
    test_data = []
    for user in users:
        relevant_items = random.sample(news_ids, num_relevant)
        test_data.append({"user_id": user, "relevant_items": relevant_items})
    
    return pd.DataFrame(test_data)

# algo for recommendation
def recommend_articles(user_id, football_news_df, bets_df, feedback_df, liked_clubs_df, clubs_df, top_n=20):

    # User's preferred teams from bets
    user_bets = bets_df[bets_df['user_id'] == user_id]
    preferred_teams = user_bets['selected_team'].astype(str).unique() 

    # User's liked clubs
    user_liked_clubs = liked_clubs_df[liked_clubs_df['user_id'] == user_id]['club_id'].tolist()

    liked_clubs_info = clubs_df[clubs_df['id'].isin(user_liked_clubs)][['club', 'country']]
    liked_club_names = liked_clubs_info['club'].astype(str).tolist()  
    liked_club_countries = liked_clubs_info['country'].astype(str).tolist()  

    preferred_articles = football_news_df[
        football_news_df['Content'].str.contains('|'.join(preferred_teams), case=False, na=False)
        | football_news_df['Content'].str.contains('|'.join(liked_club_names), case=False, na=False)
        | football_news_df['Content'].str.contains('|'.join(liked_club_countries), case=False, na=False)
    ]

    # Exclude articles marked as "not interested"
    not_interested = feedback_df[
        (feedback_df['user_id'] == user_id) & (feedback_df['action'] == 'not_interested')
    ]['news_id'].tolist()
    filtered_articles = preferred_articles[~preferred_articles['News ID'].isin(not_interested)]

    # TF-IDF Vectorization
    tfidf_vectorizer = TfidfVectorizer(
        stop_words='english', ngram_range=(1, 2), max_features=5000,
        sublinear_tf=True, max_df=0.7, min_df=2
    )
    tfidf_matrix = tfidf_vectorizer.fit_transform(filtered_articles['Content'])

    # Cosine Similarity
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Boost scores 
    user_feedback = feedback_df[feedback_df['user_id'] == user_id]
    for _, feedback in user_feedback.iterrows():
        if feedback['action'] == 'rated':
            news_index = filtered_articles[filtered_articles['News ID'] == feedback['news_id']].index
            if not news_index.empty:
                news_idx = news_index[0]
                if news_idx < similarity_matrix.shape[1]:
                    similarity_matrix[:, news_idx] *= (1 + feedback['rating'] / 5.0)

    # Rank and recommend
    recommended_indices = similarity_matrix.sum(axis=1).argsort()[-top_n:][::-1]
    recommendations = filtered_articles.iloc[recommended_indices]

    return recommendations['News ID'].tolist()


In [4]:
football_news_df = preprocess_data(football_news_df)

# Generate test data
users = users_df['id'].tolist()
news_ids = football_news_df['News ID'].tolist()
test_data = generate_test_data(users, news_ids, num_relevant=5)

# for evaluation system
# Recommendation system wrapper
class RecommendationSystem:
    def recommend(self, user_id, k=10):
        return recommend_articles(
            user_id, football_news_df, bets_df, feedback_df, liked_clubs_df, clubs_df, top_n=k
        )

# Instantiate the system
system = RecommendationSystem()

# TF-IDF embeddings for diversity
tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=5000)
content_embeddings = tfidf_vectorizer.fit_transform(football_news_df['Content'])

content_embedding_dict = {
    row['News ID']: content_embeddings[i].toarray().flatten() for i, row in football_news_df.iterrows()
}

# Global popularity for novelty and serendipity
global_popularity = {news_id: 1 / (i + 1) for i, news_id in enumerate(football_news_df['News ID'])}



# Part 2: Evaluation of recommendation system

## === Evaluation functions ===

### 1. Predictive quality metrics

In [5]:
def precision_at_k(recommended, relevant, k):
    recommended_at_k = recommended[:k]
    hits = len(set(recommended_at_k) & set(relevant))
    return hits / k


def recall_at_k(recommended, relevant, k):
    recommended_at_k = recommended[:k]
    hits = len(set(recommended_at_k) & set(relevant))
    return hits / len(relevant) if len(relevant) > 0 else 0


def f_score_at_k(precision, recall):
    return (
        2 * (precision * recall) / (precision + recall)
        if (precision + recall) > 0
        else 0
    )

### 2. Ranking quality metrics

In [6]:
def mrr(recommended, relevant):
    for i, item in enumerate(recommended):
        if item in relevant:
            return 1 / (i + 1)
    return 0


def average_precision(recommended, relevant):
    hits, precision_sum = 0, 0
    for i, item in enumerate(recommended):
        if item in relevant:
            hits += 1
            precision_sum += hits / (i + 1)
    return precision_sum / len(relevant) if relevant else 0


def ndcg(recommended, relevant, k):
    recommended_at_k = recommended[:k]
    dcg = sum(
        [1 / log2(i + 2) if recommended_at_k[i] in relevant else 0 for i in range(k)]
    )
    idcg = sum([1 / log2(i + 2) for i in range(min(len(relevant), k))])
    return dcg / idcg if idcg > 0 else 0

### 3. Behavioral metrics

In [7]:
def diversity(recommended, content_embeddings):
    embeddings = np.array([content_embeddings[news_id] for news_id in recommended if news_id in content_embeddings])
    if len(embeddings) < 2: 
        return 0
    
    similarities = cosine_similarity(embeddings)
    return 1 - np.mean(similarities[np.triu_indices(len(similarities), k=1)])

def novelty(recommended, global_popularity):
    return np.mean([1 - global_popularity[item] for item in recommended])


def serendipity(recommended, relevant, global_popularity):
    unexpected_items = [item for item in recommended if item not in relevant]
    return np.mean([1 - global_popularity[item] for item in unexpected_items])

## === Evaluation pipeline ===

In [8]:
def evaluate_system(system, test_data, content_embeddings, global_popularity, k=10):
    precision_scores, recall_scores, f_scores = [], [], []
    mrr_scores, map_scores, ndcg_scores = [], [], []
    diversity_scores, novelty_scores, serendipity_scores = [], [], []

    for _, row in test_data.iterrows():
        user_id = row['user_id']
        relevant = set(row['relevant_items'])
        recommended = system.recommend(user_id, k=k)

        if not recommended:
            continue

        # Predictive metrics
        precision = precision_at_k(recommended, relevant, k)
        recall = recall_at_k(recommended, relevant, k)
        f_score = f_score_at_k(precision, recall)

        # Ranking metrics
        mrr_score = mrr(recommended, relevant)
        map_score = average_precision(recommended, relevant)
        ndcg_score = ndcg(recommended, relevant, k)

        # Behavioral metrics
        diversity_score = diversity(recommended, content_embedding_dict)
        novelty_score = novelty(recommended, global_popularity)
        serendipity_score = serendipity(recommended, relevant, global_popularity)

        precision_scores.append(precision)
        recall_scores.append(recall)
        f_scores.append(f_score)
        mrr_scores.append(mrr_score)
        map_scores.append(map_score)
        ndcg_scores.append(ndcg_score)
        diversity_scores.append(diversity_score)
        novelty_scores.append(novelty_score)
        serendipity_scores.append(serendipity_score)

    return {
        "Precision@K": np.mean(precision_scores) if precision_scores else 0,
        "Recall@K": np.mean(recall_scores) if recall_scores else 0,
        "F-Score@K": np.mean(f_scores) if f_scores else 0,
        "MRR": np.mean(mrr_scores) if mrr_scores else 0,
        "MAP": np.mean(map_scores) if map_scores else 0,
        "NDCG": np.mean(ndcg_scores) if ndcg_scores else 0,
        "Diversity": np.mean(diversity_scores) if diversity_scores else 0,
        "Novelty": np.mean(novelty_scores) if novelty_scores else 0,
        "Serendipity": np.mean(serendipity_scores) if serendipity_scores else 0,
    }

### 3. Testing

In [9]:
print(f"Actual column names: {list(test_data.columns)}")

results = evaluate_system(system, test_data, content_embedding_dict, global_popularity, k=10)

print("Evaluation results:")
for metric, score in results.items():
    print(f"{metric}: {score:.4f}")

Actual column names: ['user_id', 'relevant_items']
Evaluation results:
Precision@K: 0.0100
Recall@K: 0.0200
F-Score@K: 0.0133
MRR: 0.0200
MAP: 0.0040
NDCG: 0.0131
Diversity: 0.7778
Novelty: 0.9874
Serendipity: 0.9874
