In [None]:
import sys
sys.path.append('/home/jakebybee/.local/bin')

from flask import Flask, request, jsonify
import joblib
import pandas as pd
from scipy.sparse import coo_matrix
import numpy as np
from flask_cors import CORS  # Import CORS
import nltk


with open('content_filtering_model.sav', 'rb') as model_file:
    similarity_matrix, tfidf_matrix, articles = joblib.load(model_file)

from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import re


stop_words = set(stopwords.words('english'))
ps = PorterStemmer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    words = text.split()
    words = [ps.stem(word) for word in words if word not in stop_words]
    return " ".join(words)


def get_recommendations(title, articles_df, similarity_matrix, tfidf_matrix):
    title = articles_df[articles_df['contentId'] == title]['title'].iloc[0]
    idx = articles_df[articles_df['title'] == title].index[0]

    sim_scores = list(enumerate(similarity_matrix[idx]))

    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    top_articles = sim_scores[1:6]

    article_indices = [i[0] for i in top_articles]

    return articles_df.iloc[article_indices]['title'].tolist()


# Load the model
knn_model = joblib.load('knn_model.sav')
# Load the articles dataset
articles = pd.read_csv('shared_articles.csv')

# Load mappings
users = pd.read_csv('users_interactions.csv')
users['eventType'].unique()
rating_map = {"VIEW": 1, "LIKE": 2, "FOLLOW": 3, "BOOKMARK": 4, "COMMENT CREATED": 5}
users["rating"] = users["eventType"].map(rating_map)
merged = pd.merge(right=articles, left=users, right_on='contentId', left_on='contentId', how='left')
triple = merged[['personId', 'contentId', 'rating']]

triple['mappingPerson'] = triple['personId'].astype('category').cat.codes
triple['mappingcontent'] = triple['contentId'].astype('category').cat.codes

sparse_matrix = coo_matrix((triple['rating'], (triple['mappingPerson'], triple['mappingcontent'])))

personMapping = {row.personId:row.mappingPerson for row in triple.itertuples()}
contentMapping = {row.mappingcontent:row.contentId for row in triple.itertuples()}

app = Flask(__name__)
CORS(app)
@app.route('/recommend', methods=['GET'])
def recommend():
    user_id = request.args.get('user_id', type=int)

    if user_id not in personMapping.keys():
        return jsonify({'error': 'User not found'}), 404


    try:
        distance, indices = knn_model.kneighbors(sparse_matrix.getrow(personMapping[user_id]), n_neighbors=5)
        recommendations = [contentMapping[content] for content in indices[0]]
        titles = articles[articles['contentId'].isin(recommendations)]['title'].tolist()
        return jsonify({'recommendations': titles})
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/get_recommendations', methods=['GET'])
def recommend_content():
    # Get article title from the query parameters
    article_title = request.args.get('title')
    article_title = int(article_title)

    if not article_title:
        return jsonify({"error": "No title provided"}), 400

    # Get the recommendations
    try:
        recommended_articles = get_recommendations(article_title, articles, similarity_matrix, tfidf_matrix)
        return jsonify({"recommendations": recommended_articles})
    except Exception as e:
        return jsonify({"error": str(e)}), 500


if __name__ == '__main__':
    app.run(host="0.0.0.0", port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  triple['mappingPerson'] = triple['personId'].astype('category').cat.codes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  triple['mappingcontent'] = triple['contentId'].astype('category').cat.codes
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.1.99:5000
Press CTRL+C to quit
127.0.0.1 - - [03/Apr/2025 17:11:44] "GET /get_recommendations?title=9213260650272029784 HTTP/1.1" 500 -
127.0.0.1 - - [03/Apr/2025 17:11:48] "GET /get_recommendations?title=921326065027