In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import norm
from scipy.special import erfinv
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

# Load Datasets
users_df = pd.read_csv("users.csv")
posts_df = pd.read_csv("posts.csv")
interactions_df = pd.read_csv("interactions.csv")

# -------------------------  Personalized Ranking (ALS) -------------------------
# Convert implicit feedback into ratings
interactions_df["rating"] = interactions_df["upvotes"] * 1 + interactions_df["saves"] * 3 - interactions_df["downvotes"] * 2

# Use Surprise SVD for Collaborative Filtering
reader = Reader(rating_scale=(-2, 3))  # Ratings range from -2 to 3
data = Dataset.load_from_df(interactions_df[["user_id", "post_id", "rating"]], reader)
trainset, testset = train_test_split(data, test_size=0.2)

model = SVD()
model.fit(trainset)

# Generate ALS-based predictions
def get_als_recommendations(user_id, top_n=10):
    post_ids = posts_df["post_id"].tolist()
    predictions = [(post, model.predict(user_id, post).est) for post in post_ids]
    predictions.sort(key=lambda x: x[1], reverse=True)
    return [post for post, _ in predictions[:top_n]]

# ---------------------- Content-Based Filtering (TF-IDF / Similarity) ----------------------
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(posts_df["content"])

# Compute similarity between posts
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Get recommendations based on user interests
def get_content_based_recommendations(user_id, top_n=10):
    user_interests = users_df[users_df["user_id"] == user_id]["interests"].values[0]
    user_posts = posts_df[posts_df["tags"].apply(lambda tags: any(i in tags for i in user_interests))]

    if user_posts.empty:
        return []  # No relevant posts found

    sim_scores = cosine_sim[user_posts.index].mean(axis=0)
    post_indices = np.argsort(sim_scores)[::-1][:top_n]
    return posts_df.iloc[post_indices]["post_id"].tolist()

# ---------------------- Industry & Work Profile Prioritization ----------------------
def get_industry_recommendations(user_id, recommendations, top_n=10):
    user_industry = users_df[users_df["user_id"] == user_id]["industry"].values[0]
    industry_posts = posts_df[posts_df["industry"] == user_industry]["post_id"].tolist()

    prioritized = [post for post in recommendations if post in industry_posts]
    remaining = [post for post in recommendations if post not in industry_posts]

    return (prioritized + remaining)[:top_n]

# ----------------------  Bayesian Ranking (Wilson Score Interval) ----------------------
def wilson_score(upvotes, downvotes, confidence=0.95):
    n = upvotes + downvotes
    if n == 0:
        return 0
    z = norm.ppf(1 - (1 - confidence) / 2)
    p = upvotes / n
    return (p + z**2 / (2 * n) - z * ((p * (1 - p) + z**2 / (4 * n)) / n)**0.5) / (1 + z**2 / n)

# Apply Bayesian ranking
posts_df["wilson_score"] = posts_df.apply(lambda row: wilson_score(row["upvotes"], row["downvotes"]), axis=1)
posts_df = posts_df.sort_values(by="wilson_score", ascending=False)

# ----------------------  Final Recommendation Function ----------------------
def get_final_recommendations(user_id, top_n=10):
    als_recs = get_als_recommendations(user_id, top_n=30)
    content_recs = get_content_based_recommendations(user_id, top_n=30)

    combined_recs = list(set(als_recs + content_recs))
    industry_prioritized_recs = get_industry_recommendations(user_id, combined_recs, top_n=30)

    final_recommendations = sorted(industry_prioritized_recs, key=lambda post: posts_df[posts_df["post_id"] == post]["wilson_score"].values[0], reverse=True)

    return final_recommendations[:top_n]

# Test Recommendation
user_id = 5
recommended_posts = get_final_recommendations(user_id, top_n=10)
print("Recommended Posts:", recommended_posts)


Recommended Posts: [3501, 1684, 1409, 3002, 2707, 942, 3895, 2079, 3730, 3994]


In [2]:
!pip install --no-cache-dir scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505210 sha256=c1ec00205efbcacec6a7c30449b72142e9e02fa17a91ebf8401e82040218668b
  Stored in directory: /tmp/pip-ephem-wheel-cache-s_0p5fco/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: sc

In [4]:
!pip install numpy



In [5]:
!pip uninstall -y numpy scikit-surprise
!pip cache purge

Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
Found existing installation: scikit-surprise 1.1.4
Uninstalling scikit-surprise-1.1.4:
  Successfully uninstalled scikit-surprise-1.1.4
Files removed: 30


In [6]:
pip install numpy==1.23.5

Collecting numpy==1.23.5
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m74.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.23.5 which is incompatible.
treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.23.5 which is incompatible.
albucore 0.0.23 requires numpy>=1.24.4, but you have numpy 1.23.5 which is incompatible.
blosc2 3.2.0 requires numpy>=1.26, but you have numpy 1.23.5 which is incompatible.
chex 0.1.89 requires numpy>=1.24.1, but you have numpy 1.23.5 which is incompatible.
jax 

In [1]:
pip install --no-cache-dir scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505219 sha256=79e941544980c4095f33e845866d8d44d2f7e14b1486b0ec34da1d781c3ab22b
  Stored in directory: /tmp/pip-ephem-wheel-cache-4gze8mwb/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: sc