# Recommendations with IBM
This notebook contains the code for the IBM recommendation system project.

In [None]:
# 1. Importing Libraries and Loading the Data
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt

# Load datasets
df_user_item = pd.read_csv('data/user-item-interactions.csv')
df_articles = pd.read_csv('data/articles.csv')

# View the first few rows of the datasets
df_user_item.head(), df_articles.head()

## 2. Data Exploration and Preprocessing

In [None]:
# Checking for missing values and data types
print(df_user_item.info())
print(df_articles.info())

# Descriptive statistics of interactions and articles
print(df_user_item.describe())
print(df_articles.describe())

# Dropping duplicates if any
df_user_item.drop_duplicates(inplace=True)
df_articles.drop_duplicates(inplace=True)

## 3. Rank-Based Recommendation System

In [None]:
def get_top_articles(df, n=10):
    # Rank articles based on the number of interactions
    article_counts = df['article_id'].value_counts().head(n)
    return article_counts.index.tolist()

def get_top_article_names(df, n=10):
    top_articles_ids = get_top_articles(df, n)
    return df_articles[df_articles['article_id'].isin(top_articles_ids)]['title'].unique()

# Top 10 articles by interactions
print(get_top_article_names(df_user_item, n=10))

## 4. Collaborative Filtering Using Matrix Factorization (SVD)

In [None]:
# Create the user-item matrix
user_item_matrix = df_user_item.pivot(index='user_id', columns='article_id', values='interaction').fillna(0)

# Perform Singular Value Decomposition (SVD)
svd = TruncatedSVD(n_components=50, random_state=42)
latent_matrix = svd.fit_transform(user_item_matrix)

# Compute similarity between articles
similarity_matrix = cosine_similarity(latent_matrix)

def find_similar_items(item_id, n=10):
    item_index = user_item_matrix.columns.get_loc(item_id)
    similar_items = similarity_matrix[item_index].argsort()[-n:]
    return user_item_matrix.columns[similar_items]

# Example: Find 10 articles similar to article 123
print(find_similar_items(article_id=123, n=10))

## 5. Content-Based Filtering Using TF-IDF

In [None]:
# Extract article content features using TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
article_tfidf = tfidf.fit_transform(df_articles['content'])

# Compute cosine similarity based on article content
def find_similar_content(article_id, n=10):
    article_index = df_articles.index[df_articles['article_id'] == article_id].tolist()[0]
    cosine_similarities = cosine_similarity(article_tfidf[article_index], article_tfidf).flatten()
    related_articles_indices = cosine_similarities.argsort()[-n:]
    return df_articles.iloc[related_articles_indices]['title']

# Example: Find 10 articles similar in content to article 123
print(find_similar_content(article_id=123, n=10))

## 6. Hybrid Recommendation System

In [None]:
def hybrid_recommendation(article_id, n=10, alpha=0.5):
    similar_items = find_similar_items(article_id, n)
    similar_content = find_similar_content(article_id, n)

    hybrid_recommendations = pd.concat([similar_items, similar_content]).drop_duplicates()

    # Weight collaborative vs content-based recommendations
    return hybrid_recommendations.sample(frac=alpha, random_state=42).head(n)

# Example: Hybrid recommendation for article 123
print(hybrid_recommendation(article_id=123, n=10))

## 7. Evaluation

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Example: Evaluate recommendations using Precision, Recall, and F1-score
y_true = [1, 0, 1, 1, 0]  # True labels
y_pred = [1, 0, 1, 0, 1]  # Predicted recommendations

# Evaluation metrics
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-Score: {f1}")