In [53]:
import pandas as pd
import numpy as np
import pandas as pd
from math import log2
import random

#
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# neural
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity

In [54]:
# Load the datasets
football_news_df = pd.read_csv('./data/news_test.csv', encoding='latin1')
matches_df = pd.read_csv('./data/matches.csv', encoding='latin1')
users_df = pd.read_csv('./data/users.csv')
bets_df = pd.read_csv('./data/bets.csv')
liked_clubs_df = pd.read_csv('./data/liked_clubs.csv')
clubs_df = pd.read_csv('./data/clubs.csv')
feedback_file_path = './data/feedback.csv'

try:
    feedback_df = pd.read_csv(feedback_file_path)
except FileNotFoundError:
    feedback_df = pd.DataFrame(columns=['user_id', 'news_id', 'action', 'rating'])

# Part 1: Recommendation system

## 1. Algo: tf-idf

In [55]:
# Preprocessing function
def preprocess_data(news_df):
    news_df = news_df.dropna(subset=['Content', 'Title'])
    news_df['Content'] = news_df['Content'].fillna('')
    return news_df

football_news_df = preprocess_data(football_news_df)

def generate_test_data(users, news_ids, num_relevant=5):
    test_data = []
    for user in users:
        relevant_items = random.sample(news_ids, num_relevant)
        test_data.append({"user_id": user, "relevant_items": relevant_items})
    
    return pd.DataFrame(test_data)

def recommend_articles(user_id, football_news_df, bets_df, feedback_df, liked_clubs_df, clubs_df, top_n=20):
    # User's preferred teams from bets
    user_bets = bets_df[bets_df['user_id'] == user_id]
    preferred_teams = user_bets['selected_team'].astype(str).unique()  # Ensure teams are strings

    # User's liked clubs
    user_liked_clubs = liked_clubs_df[liked_clubs_df['user_id'] == user_id]['club_id'].tolist()

    # Map liked clubs to their names and countries
    liked_clubs_info = clubs_df[clubs_df['id'].isin(user_liked_clubs)][['club', 'country']]
    liked_club_names = liked_clubs_info['club'].astype(str).tolist()  # Convert to strings
    liked_club_countries = liked_clubs_info['country'].astype(str).tolist()  # Convert to strings

    # Filter news articles about preferred teams, liked clubs, or their countries
    preferred_articles = football_news_df[
        football_news_df['Content'].str.contains('|'.join(preferred_teams), case=False, na=False)
        | football_news_df['Content'].str.contains('|'.join(liked_club_names), case=False, na=False)
        | football_news_df['Content'].str.contains('|'.join(liked_club_countries), case=False, na=False)
    ]

    # Exclude articles marked as "not interested"
    not_interested = feedback_df[
        (feedback_df['user_id'] == user_id) & (feedback_df['action'] == 'not_interested')
    ]['news_id'].tolist()
    filtered_articles = preferred_articles[~preferred_articles['News ID'].isin(not_interested)]

    # TF-IDF Vectorization
    tfidf_vectorizer = TfidfVectorizer(
        stop_words='english', ngram_range=(1, 2), max_features=5000,
        sublinear_tf=True, max_df=0.7, min_df=2
    )
    tfidf_matrix = tfidf_vectorizer.fit_transform(filtered_articles['Content'])

    # Cosine Similarity
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Boost scores for user-rated articles
    user_feedback = feedback_df[feedback_df['user_id'] == user_id]
    for _, feedback in user_feedback.iterrows():
        if feedback['action'] == 'rated':
            news_index = filtered_articles[filtered_articles['News ID'] == feedback['news_id']].index
            if not news_index.empty:
                news_idx = news_index[0]
                if news_idx < similarity_matrix.shape[1]:
                    similarity_matrix[:, news_idx] *= (1 + feedback['rating'] / 5.0)

    # Rank and recommend
    recommended_indices = similarity_matrix.sum(axis=1).argsort()[-top_n:][::-1]
    recommendations = filtered_articles.iloc[recommended_indices]

    return recommendations['News ID'].tolist()


In [56]:
# Generate test data
users = users_df['id'].tolist()
news_ids = football_news_df['News ID'].tolist()
test_data = generate_test_data(users, news_ids, num_relevant=5)

# Recommendation system wrapper
class RecommendationSystem:
    def recommend(self, user_id, k=10):
        return recommend_articles(
            user_id, football_news_df, bets_df, feedback_df, liked_clubs_df, clubs_df, top_n=k
        )

# Instantiate the system
system = RecommendationSystem()

# TF-IDF embeddings for diversity
tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=5000)
content_embeddings = tfidf_vectorizer.fit_transform(football_news_df['Content'])

content_embedding_dict = {
    row['News ID']: content_embeddings[i].toarray().flatten() for i, row in football_news_df.iterrows()
}

# Global popularity for novelty and serendipity
global_popularity = {news_id: 1 / (i + 1) for i, news_id in enumerate(football_news_df['News ID'])}



## Algo 2: Neural recommendation 

In [91]:
def prepare_data(feedback_df):
    feedback_df = feedback_df[feedback_df['action'] == 'rated'].dropna(subset=['rating'])
    feedback_df['rating'] = feedback_df['rating'].astype(float)
    user_ids = feedback_df['user_id'].astype('category').cat.codes
    news_ids = feedback_df['news_id'].astype('category').cat.codes
    feedback_df['user_id'] = user_ids
    feedback_df['news_id'] = news_ids

    num_users = user_ids.max() + 1
    num_items = news_ids.max() + 1
    return feedback_df, num_users, num_items

# Prepare betting and liked_clubs data
def get_user_preferences(user_id, bets_df, liked_clubs_df, clubs_df):
    # Teams from bets
    user_teams = bets_df[bets_df['user_id'] == user_id]['selected_team'].unique().tolist()

    # Clubs from liked_clubs
    liked_club_ids = liked_clubs_df[liked_clubs_df['user_id'] == user_id]['club_id'].unique()
    liked_clubs = clubs_df[clubs_df['id'].isin(liked_club_ids)]['club'].unique().tolist()

    return user_teams, liked_clubs

feedback_df, num_users, num_items = prepare_data(feedback_df)

In [92]:
class FeedbackDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['user_id'].values, dtype=torch.long)
        self.items = torch.tensor(df['news_id'].values, dtype=torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype=torch.float)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]
    
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)

        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim * 2, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, user_ids, item_ids):
        user_embed = self.user_embedding(user_ids)
        item_embed = self.item_embedding(item_ids)
        x = torch.cat([user_embed, item_embed], dim=-1)
        return self.fc_layers(x).squeeze()

In [93]:
# train_df, test_df = train_test_split(feedback_df, test_size=0.2, random_state=42)
# train_dataset = FeedbackDataset(train_df)
# test_dataset = FeedbackDataset(test_df)

# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Prepare feedback_df
feedback_df = feedback_df[feedback_df['action'] == 'rated'].dropna(subset=['rating'])
feedback_df['rating'] = feedback_df['rating'].astype(float)
feedback_df['user_id'] = feedback_df['user_id'].astype('category').cat.codes
feedback_df['news_id'] = feedback_df['news_id'].astype('category').cat.codes

num_users = feedback_df['user_id'].nunique()
num_items = feedback_df['news_id'].nunique()

# Split into train and test sets
train_df, test_df = train_test_split(feedback_df, test_size=0.2, random_state=42)
train_dataset = FeedbackDataset(train_df)
test_dataset = FeedbackDataset(test_df)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Initialize model, loss, and optimizer
model = NCF(num_users, num_items)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [87]:
model = NCF(num_users, num_items)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [103]:
def train(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for user_ids, item_ids, ratings in train_loader:
            optimizer.zero_grad()
            predictions = model(user_ids, item_ids)
            loss = criterion(predictions, ratings)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}")
        
# def evaluate_rmse(model, test_loader):
#     model.eval()
#     predictions, targets = [], []
#     with torch.no_grad():
#         for user_ids, item_ids, ratings in test_loader:
#             preds = model(user_ids, item_ids)
#             predictions.extend(preds.numpy())
#             targets.extend(ratings.numpy())
#     # Calculate RMSE manually
#     rmse = np.sqrt(mean_squared_error(targets, predictions))
#     print(f"Test RMSE: {rmse}")
#     return rmse

def evaluate_recommendations(model, test_loader, top_k=10):
    model.eval()
    precision_list = []
    recall_list = []
    ndcg_list = []

    with torch.no_grad():
        for user_ids, item_ids, ratings in test_loader:
            for user_id in user_ids.unique():
                user_item_ids = item_ids[user_ids == user_id]
                user_ratings = ratings[user_ids == user_id]

                # Get predictions for all items for the current user
                all_items = torch.arange(num_items, dtype=torch.long)
                user_ids_full = torch.full((num_items,), user_id.item(), dtype=torch.long)
                predictions = model(user_ids_full, all_items)
                
                # Get top K recommendations
                top_k_items = predictions.argsort(descending=True)[:top_k]

                # True items the user interacted with in the test set
                true_items = user_item_ids.numpy()
                true_ratings = user_ratings.numpy()
                
                # Calculate precision, recall, and NDCG
                recommended_items = top_k_items.numpy()
                relevant = np.isin(recommended_items, true_items).astype(int)

                precision = np.sum(relevant) / top_k
                recall = np.sum(relevant) / len(true_items)
                
                # NDCG calculation
                dcg = np.sum(relevant / np.log2(np.arange(2, len(relevant) + 2)))
                idcg = np.sum(1 / np.log2(np.arange(2, len(true_items) + 2))) if len(true_items) > 0 else 1
                ndcg = dcg / idcg

                precision_list.append(precision)
                recall_list.append(recall)
                ndcg_list.append(ndcg)
                

    # Calculate average metrics
    avg_precision = np.mean(precision_list)
    avg_recall = np.mean(recall_list)
    avg_ndcg = np.mean(ndcg_list)

    print(f"Average Precision@{top_k}: {avg_precision:.4f}")
    print(f"Average Recall@{top_k}: {avg_recall:.4f}")
    print(f"Average NDCG@{top_k}: {avg_ndcg:.4f}")

    return avg_precision, avg_recall, avg_ndcg


def recommend_for_user(model, user_id, num_recommendations=10):
    model.eval()
    all_items = torch.arange(num_items, dtype=torch.long)
    user_ids = torch.full((num_items,), user_id, dtype=torch.long)
    with torch.no_grad():
        predictions = model(user_ids, all_items)
    top_items = predictions.argsort(descending=True)[:num_recommendations]
    return top_items.numpy()


def personalized_recommendations(user_id, model, articles_df, num_recommendations=10):
    # Get top-N recommendations from the model
    recommended_items = recommend_for_user(model, user_id, num_recommendations)

    # Map article IDs to titles
    recommended_articles = articles_df.iloc[recommended_items][['News ID', 'Title']]

    # Add preferred teams and liked clubs
    user_teams, liked_clubs = get_user_preferences(user_id, bets_df, liked_clubs_df, clubs_df)
    additional_recommendations = articles_df[
        articles_df['Content'].str.contains('|'.join(user_teams + liked_clubs), case=False, na=False)
    ][['News ID', 'Title']]

    # Combine and deduplicate recommendations
    combined_recommendations = pd.concat([recommended_articles, additional_recommendations]).drop_duplicates()

    return combined_recommendations


In [104]:
# train(model, train_loader, criterion, optimizer, epochs=10)
# evaluate_rmse(model, test_loader)

train(model, train_loader, criterion, optimizer, epochs=10)

# Evaluate the model
precision, recall, ndcg = evaluate_recommendations(model, test_loader, top_k=10)

user_id = 0  # Example user ID
recommendations = personalized_recommendations(user_id, model, football_news_df, num_recommendations=10)
# print("Recommendations for User:")
# print(recommendations)


Epoch 1, Loss: 0.05395024362951517
Epoch 2, Loss: 0.04566503223031759
Epoch 3, Loss: 0.04114648327231407
Epoch 4, Loss: 0.03848567930981517
Epoch 5, Loss: 0.03413637587800622
Epoch 6, Loss: 0.02885532658547163
Epoch 7, Loss: 0.026404693257063627
Epoch 8, Loss: 0.02697556628845632
Epoch 9, Loss: 0.020995293278247118
Epoch 10, Loss: 0.02113227592781186
Average Precision@10: 0.0111
Average Recall@10: 0.0556
Average NDCG@10: 0.0293


# Part 2: Evaluation of recommendation system

## === Evaluation functions ===

### 1. Predictive quality metrics

In [27]:
def precision_at_k(recommended, relevant, k):
    recommended_at_k = recommended[:k]
    hits = len(set(recommended_at_k) & set(relevant))
    return hits / k


def recall_at_k(recommended, relevant, k):
    recommended_at_k = recommended[:k]
    hits = len(set(recommended_at_k) & set(relevant))
    return hits / len(relevant) if len(relevant) > 0 else 0


def f_score_at_k(precision, recall):
    return (
        2 * (precision * recall) / (precision + recall)
        if (precision + recall) > 0
        else 0
    )

### 2. Ranking quality metrics

In [28]:
def mrr(recommended, relevant):
    for i, item in enumerate(recommended):
        if item in relevant:
            return 1 / (i + 1)
    return 0


def average_precision(recommended, relevant):
    hits, precision_sum = 0, 0
    for i, item in enumerate(recommended):
        if item in relevant:
            hits += 1
            precision_sum += hits / (i + 1)
    return precision_sum / len(relevant) if relevant else 0


def ndcg(recommended, relevant, k):
    recommended_at_k = recommended[:k]
    dcg = sum(
        [1 / log2(i + 2) if recommended_at_k[i] in relevant else 0 for i in range(k)]
    )
    idcg = sum([1 / log2(i + 2) for i in range(min(len(relevant), k))])
    return dcg / idcg if idcg > 0 else 0

### 3. Behavioral metrics

In [59]:
# def diversity(recommended, content_embeddings):
#     similarities = cosine_similarity(content_embeddings[recommended])
#     return 1 - np.mean(similarities[np.triu_indices(len(similarities), k=1)])

def diversity(recommended, content_embeddings):
    embeddings = np.array([content_embeddings[news_id] for news_id in recommended if news_id in content_embeddings])
    if len(embeddings) < 2: 
        return 0
    
    similarities = cosine_similarity(embeddings)
    return 1 - np.mean(similarities[np.triu_indices(len(similarities), k=1)])

# def novelty(recommended, global_popularity):
#     scores = [global_popularity[news_id] for news_id in recommended if news_id in global_popularity]
#     return np.mean(scores) if scores else 0

# def serendipity(recommended, relevant, global_popularity):
#     relevant_popularity = [global_popularity[news_id] for news_id in relevant if news_id in global_popularity]
#     recommended_popularity = [global_popularity[news_id] for news_id in recommended if news_id in global_popularity]
    
#     if not relevant_popularity or not recommended_popularity:
#         return 0
    
#     return np.mean(recommended_popularity) - np.mean(relevant_popularity)



def novelty(recommended, global_popularity):
    return np.mean([1 - global_popularity[item] for item in recommended])


def serendipity(recommended, relevant, global_popularity):
    unexpected_items = [item for item in recommended if item not in relevant]
    return np.mean([1 - global_popularity[item] for item in unexpected_items])

## === Evaluation pipeline ===

In [57]:
def evaluate_system(system, test_data, content_embeddings, global_popularity, k=10):
    precision_scores, recall_scores, f_scores = [], [], []
    mrr_scores, map_scores, ndcg_scores = [], [], []
    diversity_scores, novelty_scores, serendipity_scores = [], [], []

    for _, row in test_data.iterrows():
        user_id = row['user_id']
        relevant = set(row['relevant_items'])
        recommended = system.recommend(user_id, k=k)

        # Skip evaluation if no recommendations are generated
        if not recommended:
            continue

        # Predictive metrics
        precision = precision_at_k(recommended, relevant, k)
        recall = recall_at_k(recommended, relevant, k)
        f_score = f_score_at_k(precision, recall)

        # Ranking metrics
        mrr_score = mrr(recommended, relevant)
        map_score = average_precision(recommended, relevant)
        ndcg_score = ndcg(recommended, relevant, k)

        # Behavioral metrics
        diversity_score = diversity(recommended, content_embedding_dict)
        novelty_score = novelty(recommended, global_popularity)
        serendipity_score = serendipity(recommended, relevant, global_popularity)

        # Append scores
        precision_scores.append(precision)
        recall_scores.append(recall)
        f_scores.append(f_score)
        mrr_scores.append(mrr_score)
        map_scores.append(map_score)
        ndcg_scores.append(ndcg_score)
        diversity_scores.append(diversity_score)
        novelty_scores.append(novelty_score)
        serendipity_scores.append(serendipity_score)

    return {
        "Precision@K": np.mean(precision_scores) if precision_scores else 0,
        "Recall@K": np.mean(recall_scores) if recall_scores else 0,
        "F-Score@K": np.mean(f_scores) if f_scores else 0,
        "MRR": np.mean(mrr_scores) if mrr_scores else 0,
        "MAP": np.mean(map_scores) if map_scores else 0,
        "NDCG": np.mean(ndcg_scores) if ndcg_scores else 0,
        "Diversity": np.mean(diversity_scores) if diversity_scores else 0,
        "Novelty": np.mean(novelty_scores) if novelty_scores else 0,
        "Serendipity": np.mean(serendipity_scores) if serendipity_scores else 0,
    }

### 3. Testing

In [60]:
print(f"Actual column names: {list(test_data.columns)}")

results = evaluate_system(system, test_data, content_embedding_dict, global_popularity, k=10)

print("Evaluation results:")
for metric, score in results.items():
    print(f"{metric}: {score:.4f}")


Actual column names: ['user_id', 'relevant_items']
Evaluation results:
Precision@K: 0.0200
Recall@K: 0.0400
F-Score@K: 0.0267
MRR: 0.0111
MAP: 0.0062
NDCG: 0.0200
Diversity: 0.7778
Novelty: 0.9874
Serendipity: 0.9873
