In [1]:
from flask import Flask, request, jsonify
import pandas as pd
import numpy as np
import requests
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
import logging

In [2]:
# Configure logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Flask app initialization
app = Flask(__name__)

BASE_URL = "https://api.socialverseapp.com"
HEADERS = {
    "Flic-Token": "flic_6e2d8d25dc29a4ddd382c2383a903cf4a688d1a117f6eb43b35a1e7fadbb84b8"
}

In [54]:
# --- Data Fetching and Preprocessing ---
def fetch_data(endpoint, max_pages=10, extra_params=None):
    """
    Fetch paginated data from a given API endpoint.
    """
    data = []
    page = 1

    if extra_params is None:
        extra_params = {}

    while page <= max_pages:
        # Add pagination parameters
        params = {"page": page, "page_size": 1000}
        params.update(extra_params)

        try:
            response = requests.get(f"{BASE_URL}/{endpoint}", headers=HEADERS, params=params)
            response.raise_for_status()  # Raise an exception for bad status codes

            # Extract the 'posts' key from the response JSON
            page_data = response.json().get('posts', [])
            if not page_data:  # Stop if no 'posts' data is returned
                page_data = response.json().get('users', [])
            if not page_data:   
                break
            
            data.extend(page_data)
            page += 1
        except requests.RequestException as e:
            logger.error(f"Error fetching data from {endpoint}: {e}")
            break

    return pd.DataFrame(data)

In [4]:
def load_data():
    """
    Load and preprocess data from various endpoints
    """
    global all_users, all_posts, rated_posts
    extra_params = {
        "resonance_algorithm": "resonance_algorithm_cjsvervb7dbhss8bdrj89s44jfjdbsjd0xnjkbvuire8zcjwerui3njfbvsujc5if"
    }
    
    try:
        viewed_posts = fetch_data(endpoint="posts/view", extra_params=extra_params)
        liked_posts = fetch_data(endpoint="posts/like", extra_params=extra_params)
        inspired_posts = fetch_data(endpoint="posts/inspire", extra_params=extra_params)
        rated_posts = fetch_data(endpoint="posts/rating", extra_params=extra_params)
        all_posts = fetch_data(endpoint="posts/summary/get")
        all_users = fetch_data(endpoint="users/get_all")

        # Rename id columns
        all_posts.rename(columns={'id': 'post_id'}, inplace=True)
        all_users.rename(columns={'id': 'user_id'}, inplace=True)

        # Merge interactions
        interactions = pd.concat([viewed_posts, liked_posts, inspired_posts, rated_posts])
        
        # Merge with metadata
        posts_with_metadata = pd.merge(interactions, all_posts, on='post_id', how='inner')
        user_data = pd.merge(posts_with_metadata, all_users, on='user_id', how='inner')

        logger.info(f"Total users: {len(all_users)}")
        logger.info(f"Total posts: {len(all_posts)}")
        logger.info(f"Total interactions: {len(interactions)}")
        logger.info(f"All posts columns: {all_posts.columns.tolist()}")

        return user_data, all_posts, rated_posts

    except Exception as e:
        logger.error(f"Error in data loading: {e}")
        raise

In [5]:
# Preprocessing and Model Training
def preprocess_data(user_data, all_posts):
    """
    Preprocess data for recommendation system
    """
    # Numeric feature scaling
    scaler = MinMaxScaler()
    numeric_features = ['upvote_count', 'view_count', 'rating_percent', 'average_rating']
    user_data[numeric_features] = scaler.fit_transform(user_data[numeric_features])

    # Text preprocessing
    all_posts['title'] = all_posts['title'].fillna('').astype(str)
    all_posts['post_summary'] = all_posts['post_summary'].fillna('').astype(str)

    # Vectorization
    tfidf = TfidfVectorizer(stop_words='english', max_features=500)
    content_combined = all_posts['title'] + ' ' + all_posts['post_summary']
    all_posts['content_vector'] = list(tfidf.fit_transform(content_combined).toarray())
    
    # Similarity matrix
    similarity_matrix = cosine_similarity(np.array(all_posts['content_vector'].tolist()))

    return user_data, all_posts, similarity_matrix

In [6]:
def train_recommendation_model(rated_posts):
    """
    Train collaborative filtering model
    """
    reader = Reader(rating_scale=(0, 100))
    data = Dataset.load_from_df(rated_posts[['user_id', 'post_id', 'rating_percent']], reader)
    
    # Split data for training and testing
    trainset, testset = train_test_split(data, test_size=0.2)
    
    # Train SVD model
    model = SVD()
    model.fit(trainset)
    
    return model, trainset, testset

In [55]:
# Load and preprocess data
try:
    user_data, all_posts, rated_posts = load_data()
    user_data, all_posts, similarity_matrix = preprocess_data(user_data, all_posts)
    model, trainset, testset = train_recommendation_model(rated_posts)
except Exception as e:
    logger.error(f"Initialization error: {e}")
    raise

2024-12-08 15:50:47,073 - ERROR - Error fetching data from posts/view: HTTPSConnectionPool(host='api.socialverseapp.com', port=443): Max retries exceeded with url: /posts/view?page=1&page_size=1000&resonance_algorithm=resonance_algorithm_cjsvervb7dbhss8bdrj89s44jfjdbsjd0xnjkbvuire8zcjwerui3njfbvsujc5if (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x16be39ee0>: Failed to resolve 'api.socialverseapp.com' ([Errno 8] nodename nor servname provided, or not known)"))
2024-12-08 15:50:47,075 - ERROR - Error fetching data from posts/like: HTTPSConnectionPool(host='api.socialverseapp.com', port=443): Max retries exceeded with url: /posts/like?page=1&page_size=1000&resonance_algorithm=resonance_algorithm_cjsvervb7dbhss8bdrj89s44jfjdbsjd0xnjkbvuire8zcjwerui3njfbvsujc5if (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x176237530>: Failed to resolve 'api.socialverseapp.com' ([Errno 8] nodename nor servname provided, or not known)"))
20

KeyError: 'post_id'

In [49]:
# --- Recommendation Functions ---
def recommend_content_based(user_id, num_recommendations=10):
    """
    Content-based recommendation
    """
    user_interactions = user_data[user_data['user_id'] == user_id]
    user_posts = user_interactions['post_id'].unique()

    similar_posts = []
    for post_id in user_posts:
        post_index = all_posts[all_posts['post_id'] == post_id].index[0]
        similar_posts += list(enumerate(similarity_matrix[post_index]))

    similar_posts = sorted(similar_posts, key=lambda x: x[1], reverse=True)
    recommended_post_ids = [
        all_posts.iloc[i[0]]['post_id']
        for i in similar_posts
        if all_posts.iloc[i[0]]['post_id'] not in user_posts
    ]
    return recommended_post_ids[:num_recommendations]

def recommend_collaborative(user_id, num_recommendations=10):
    """
    Collaborative filtering recommendation
    """
    user_rated_posts = rated_posts[rated_posts['user_id'] == user_id]['post_id'].unique()
    all_post_ids = all_posts['post_id'].unique()
    unrated_posts = [post_id for post_id in all_post_ids if post_id not in user_rated_posts]

    predictions = [model.predict(user_id, post_id) for post_id in unrated_posts]
    predictions = sorted(predictions, key=lambda x: x.est, reverse=True)

    recommended_post_ids = [pred.iid for pred in predictions[:num_recommendations]]
    return recommended_post_ids

def recommend_hybrid(user_id, category_id=None, mood=None, num_recommendations=10):
    """
    Hybrid recommendation with optional filtering
    """
    filtered_posts = all_posts.copy()
    
    # Apply optional filters
    if category_id:
        try:
            # Assuming category is a nested dictionary
            filtered_posts = filtered_posts[filtered_posts['category'].apply(lambda x: x.get('id') == int(category_id))]
        except (KeyError, TypeError) as e:
            print(f"Warning: Could not filter by category. Error: {e}")
    if mood:
        if 'mood' in filtered_posts.columns:
            filtered_posts = filtered_posts[filtered_posts['mood'] == mood]
        else:
            logger.warning("'mood' column not found in posts data. Skipping mood filter.")

    # Generate recommendations
    content_recs = recommend_content_based(user_id, num_recommendations)
    collab_recs = recommend_collaborative(user_id, num_recommendations)

    # Combine and deduplicate recommendations
    recs = list(set(content_recs) | set(collab_recs))
    return recs[:num_recommendations]

In [9]:
def get_user_id(username, all_users):
    print(username)
    user = all_users[all_users['username'] == username]
    if not user.empty:
        return user.iloc[0]['user_id']
    else:
        return None

In [39]:
all_users['user_id'].shape

(1234,)

In [17]:
user_id = get_user_id(username="doeyyy", all_users=all_users)
print(recommend_hybrid(user_id=9, mood="happy"))



doeyyy
[770, 1029, 325, 1288, 1224, 11, 779, 1103, 785, 918]


In [18]:
all_users.columns

Index(['user_id', 'first_name', 'last_name', 'username', 'email', 'role',
       'profile_url', 'bio', 'website_url', 'instagram-url', 'youtube_url',
       'tictok_url', 'isVerified', 'referral_code', 'has_wallet', 'last_login',
       'share_count', 'post_count', 'following_count', 'follower_count',
       'is_verified', 'is_online', 'latitude', 'longitude'],
      dtype='object')

In [19]:
print("All Posts Columns:", all_posts.columns.tolist())
print("User Data Columns:", user_data.columns.tolist())

All Posts Columns: ['post_id', 'category', 'slug', 'title', 'identifier', 'comment_count', 'upvote_count', 'view_count', 'exit_count', 'rating_count', 'average_rating', 'share_count', 'video_link', 'contract_address', 'chain_id', 'chart_url', 'baseToken', 'is_locked', 'created_at', 'first_name', 'last_name', 'username', 'upvoted', 'bookmarked', 'thumbnail_url', 'gif_thumbnail_url', 'following', 'picture_url', 'post_summary', 'content_vector']
User Data Columns: ['id', 'post_id', 'user_id', 'viewed_at', 'liked_at', 'inspired_at', 'rating_percent', 'rated_at', 'category', 'slug', 'title', 'identifier', 'comment_count', 'upvote_count', 'view_count', 'exit_count', 'rating_count', 'average_rating', 'share_count_x', 'video_link', 'contract_address', 'chain_id', 'chart_url', 'baseToken', 'is_locked', 'created_at', 'first_name_x', 'last_name_x', 'username_x', 'upvoted', 'bookmarked', 'thumbnail_url', 'gif_thumbnail_url', 'following', 'picture_url', 'post_summary', 'first_name_y', 'last_name_y'

In [33]:
def recommend_content_based(user_id, num_recommendations=10):
    """
    Content-based recommendation with similarity scores
    
    Returns:
        List of tuples (post_id, similarity_score)
    """
    user_interactions = user_data[user_data['user_id'] == user_id]
    user_posts = user_interactions['post_id'].unique()

    similar_posts = []
    for post_id in user_posts:
        post_index = all_posts[all_posts['post_id'] == post_id].index[0]
        # Create list of (post_index, similarity_score) tuples
        similar_posts += [(index, score) 
                          for index, score in enumerate(similarity_matrix[post_index])
                          if all_posts.iloc[index]['post_id'] not in user_posts]

    # Sort by similarity score in descending order
    similar_posts = sorted(similar_posts, key=lambda x: x[1], reverse=True)
    
    # Convert to list of (post_id, similarity_score)
    recommended_posts_with_scores = [
        (all_posts.iloc[i[0]]['post_id'], i[1])
        for i in similar_posts
    ]
    
    return recommended_posts_with_scores[:num_recommendations]

def recommend_collaborative(user_id, num_recommendations=10):
    """
    Collaborative filtering recommendation with prediction scores
    
    Returns:
        List of tuples (post_id, prediction_score)
    """
    user_rated_posts = rated_posts[rated_posts['user_id'] == user_id]['post_id'].unique()
    all_post_ids = all_posts['post_id'].unique()
    unrated_posts = [post_id for post_id in all_post_ids if post_id not in user_rated_posts]

    predictions = [
        (post_id, model.predict(user_id, post_id).est) 
        for post_id in unrated_posts
    ]
    
    # Sort predictions by estimated rating in descending order
    predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

    return predictions[:num_recommendations]

def recommend_hybrid_optimized(user_id, category_id=None, mood=None, num_recommendations=10):
    """
    Enhanced hybrid recommendation with weighted blending of content-based and collaborative filtering.
    
    Args:
        user_id (int): The ID of the user to generate recommendations for
        category_id (int, optional): Filter recommendations by specific category
        mood (str, optional): Filter recommendations by mood
        num_recommendations (int, optional): Number of recommendations to return
    
    Returns:
        list: A list of recommended post IDs
    """
    # Content-based recommendations
    content_recs = recommend_content_based(user_id, num_recommendations * 2)
    content_scores = dict(content_recs)

    # Collaborative filtering recommendations
    collab_recs = recommend_collaborative(user_id, num_recommendations * 2)
    collab_scores = dict(collab_recs)

    # Combine recommendations with weighted blending
    alpha = 0.7 if user_id in rated_posts['user_id'].unique() else 0.4
    combined_scores = {}
    
    for post_id in set(content_scores.keys()).union(collab_scores.keys()):
        content_score = content_scores.get(post_id, 0)
        collab_score = collab_scores.get(post_id, 0)
        combined_scores[post_id] = alpha * content_score + (1 - alpha) * collab_score

    # Apply category and mood filters
    filtered_posts = all_posts.copy()
    
    # Check if 'category' column exists and is nested
    if category_id:
        try:
            # Assuming category is a nested dictionary
            filtered_posts = filtered_posts[filtered_posts['category'].apply(lambda x: x.get('id') == int(category_id))]
        except (KeyError, TypeError) as e:
            print(f"Warning: Could not filter by category. Error: {e}")
    
    # Add robust mood filtering
    if mood:
        try:
            # Check different possible ways mood might be stored
            if 'mood' in filtered_posts.columns:
                filtered_posts = filtered_posts[filtered_posts['mood'] == mood]
            elif 'emotions' in filtered_posts.columns:
                # If emotions is a list or nested structure
                filtered_posts = filtered_posts[filtered_posts['emotions'].apply(lambda x: mood in x)]
            else:
                print(f"Warning: No mood column found. Cannot filter by mood: {mood}")
        except (KeyError, TypeError) as e:
            print(f"Warning: Could not filter by mood. Error: {e}")

    # Rank and select top recommendations
    ranked_posts = sorted(
        ((post_id, combined_scores.get(post_id, 0)) for post_id in filtered_posts['post_id']),
        key=lambda x: x[1],
        reverse=True
    )
    
    recommended_post_ids = [post_id for post_id, _ in ranked_posts[:num_recommendations]]

    return recommended_post_ids

In [53]:
recommend_hybrid(user_id=1, category_id=2)

[325, 1029, 200, 1226, 82, 665, 921, 155, 1246, 1059]