In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load datasets
personality_data = pd.read_csv('personality_data.csv')
ratings = pd.read_csv('ratings.csv')

# Remove unnecessary columns from personality_data
personality_data = personality_data[['userid', 'openness', 'agreeableness', 'emotional_stability', 'conscientiousness', 'extraversion']]

# Merge datasets on userid
data = pd.merge(ratings, personality_data, on='userid')

# Convert user IDs and movie IDs to categorical indices
data['user_index'] = data['userid'].astype('category').cat.codes
data['movie_index'] = data['movie_id'].astype('category').cat.codes

# Split into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

train_data.head(), test_data.head()


(                                  userid  movie_id  rating           genres  \
 680239  ce044e60f703e4789957e94a1119407e    3635.0     3.5     Comedy|Crime   
 174421  10dae2db89bc660a12ae0a03ec03a28d    2657.0     3.5            Drama   
 476933  d7443883fe8c7f36f7c3eaa2403ec820    2443.0     4.0            Drama   
 150593  4638502dc978788ec9d44e64e43a6757  111743.0     3.0   Comedy|Romance   
 64141   8089532820067141cdda1bef0e24fe6e  111384.0     3.5  Horror|Thriller   
 
         openness  agreeableness  emotional_stability  conscientiousness  \
 680239       5.5            3.0                  5.5                4.0   
 174421       6.0            4.5                  2.0                4.5   
 476933       4.0            5.0                  4.5                4.0   
 150593       6.0            4.5                  4.5                4.5   
 64141        6.5            3.5                  4.5                4.5   
 
         extraversion  user_index  movie_index  
 680239    

In [6]:
import numpy as np

def train_local_model(train_data):
    user_ids = train_data['user_index'].unique()
    movie_ids = train_data['movie_index'].unique()

    n_users = len(user_ids)
    n_movies = len(movie_ids)

    # Create a mapping from user/movie IDs to indices
    user_id_to_index = {user_id: i for i, user_id in enumerate(user_ids)}
    movie_id_to_index = {movie_id: i for i, movie_id in enumerate(movie_ids)}

    # Initialize latent factors
    user_factors = np.random.normal(0, 0.1, (n_users, 10))
    movie_factors = np.random.normal(0, 0.1, (n_movies, 10))

    # Training loop
    for epoch in range(10):
        for _, row in train_data.iterrows():
            user_id = user_id_to_index[row['user_index']]
            movie_id = movie_id_to_index[row['movie_index']]
            rating = row['rating']

            prediction = np.dot(user_factors[user_id], movie_factors[movie_id])
            error = rating - prediction

            user_factors[user_id] += 0.01 * error * movie_factors[movie_id]
            movie_factors[movie_id] += 0.01 * error * user_factors[user_id]

    return user_factors, movie_factors, user_id_to_index, movie_id_to_index

# Train local model
user_factors, movie_factors, user_id_to_index, movie_id_to_index = train_local_model(train_data)
local_model = (user_factors, movie_factors)


In [7]:
def aggregate_models(local_models):
    avg_user_factors = np.mean([model[0] for model in local_models], axis=0)
    avg_movie_factors = np.mean([model[1] for model in local_models], axis=0)
    return avg_user_factors, avg_movie_factors

# Simulate aggregation from multiple users (for simplicity, using the same model multiple times)
aggregated_model = aggregate_models([local_model, local_model, local_model])  # Simulating 3 local models


In [8]:
def recommend_movies(user_factors, movie_factors, user_id, user_id_to_index, movie_id_to_index):
    user_index = user_id_to_index[user_id]
    user_vector = user_factors[user_index]
    predictions = np.dot(user_vector, movie_factors.T)
    recommended_movie_indices = np.argsort(predictions)[-10:]  # Top 10 recommendations
    recommended_movie_ids = [list(movie_id_to_index.keys())[list(movie_id_to_index.values()).index(i)] for i in recommended_movie_indices]
    return recommended_movie_ids
# Select 10 random users from the training data
random_user_ids = random.sample(list(train_data['user_index'].unique()), 10)

# Generate recommendations for each random user
for user_id in random_user_ids:
    recommendations = recommend_movies(aggregated_model[0], aggregated_model[1], user_id, user_id_to_index, movie_id_to_index)
    print(f"Recommended movies for user {user_id}: {recommendations}")


Recommended movies for user 908: [14601, 29751, 32789, 17309, 251, 20741, 31043, 5644, 32445, 8887]
Recommended movies for user 109: [16009, 17519, 14559, 3311, 20991, 17309, 20006, 8887, 32445, 31043]
Recommended movies for user 30: [28186, 29751, 2741, 287, 22311, 13113, 17309, 31043, 797, 32445]
Recommended movies for user 1172: [29268, 30967, 14601, 16009, 17519, 29751, 17309, 31043, 8887, 32445]
Recommended movies for user 1206: [20991, 20006, 16009, 11587, 13910, 1113, 17519, 17309, 32445, 31043]
Recommended movies for user 949: [26412, 9825, 32445, 3311, 19649, 17309, 13910, 16009, 18770, 11587]
Recommended movies for user 306: [1113, 18770, 10718, 20991, 13910, 17309, 11587, 16009, 31043, 32445]
Recommended movies for user 1114: [13113, 12715, 17309, 797, 49, 287, 30967, 1113, 31043, 32445]
Recommended movies for user 816: [20006, 11587, 13786, 13910, 32445, 17519, 5644, 17309, 16009, 31043]
Recommended movies for user 163: [5644, 49, 11922, 856, 844, 31043, 287, 1113, 797, 324

In [9]:
from sklearn.metrics import mean_squared_error, precision_score, recall_score

def evaluate_model(test_data, user_factors, movie_factors, user_id_to_index, movie_id_to_index):
    true_ratings = []
    predicted_ratings = []

    for _, row in test_data.iterrows():
        user_id = user_id_to_index.get(row['user_index'])
        movie_id = movie_id_to_index.get(row['movie_index'])

        # Skip if the user or movie ID is not in the training set
        if user_id is None or movie_id is None:
            continue

        true_rating = row['rating']
        predicted_rating = np.dot(user_factors[user_id], movie_factors[movie_id])
        true_ratings.append(true_rating)
        predicted_ratings.append(predicted_rating)


    # Binarize ratings for precision/recall
    true_binary = [1 if rating >= 3 else 0 for rating in true_ratings]
    predicted_binary = [1 if rating >= 3 else 0 for rating in predicted_ratings]

    # Calculate Precision and Recall
    precision = precision_score(true_binary, predicted_binary)
    recall = recall_score(true_binary, predicted_binary)
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")

    # Calculate F1 Score
    f1 = 2 * (precision * recall) / (precision + recall)
    print(f"F1-Score: {f1}")

evaluate_model(test_data, aggregated_model[0], aggregated_model[1], user_id_to_index, movie_id_to_index)


Precision: 0.8937827151798434
Recall: 0.8158850304301131
F1-Score: 0.8530592445753268


In [11]:
import numpy as np
import pandas as pd
import random
from ipywidgets import widgets, VBox, Button, HBox, Label, RadioButtons, Output
from IPython.display import display
import requests

# Load your previously trained model and data
# Assuming you have local_model, user_id_to_index, movie_id_to_index, and aggregated_model loaded

questions = [
    "I am the life of the party.",
    "I feel little concern for others.",
    "I get stressed out easily.",
    "I am always prepared.",
    "I have a rich vocabulary.",
    "I don't talk a lot.",
    "I am interested in people.",
    "I am relaxed most of the time.",
    "I leave my belongings around.",
    "I have difficulty understanding abstract ideas.",
    "I feel comfortable around people.",
    "I insult people.",
    "I worry about things.",
    "I pay attention to details.",
    "I have a vivid imagination.",
    "I keep in the background.",
    "I sympathize with others' feelings.",
    "I seldom feel blue.",
    "I make a mess of things.",
    "I am not interested in abstract ideas.",
    "I start conversations.",
    "I am not interested in other people's problems.",
    "I am easily disturbed.",
    "I get chores done right away.",
    "I have excellent ideas.",
    "I have little to say.",
    "I have a soft heart.",
    "I get upset easily.",
    "I often forget to put things back in their proper place.",
    "I do not have a good imagination.",
    "I talk to a lot of different people at parties.",
    "I am not really interested in others.",
    "I change my mood a lot.",
    "I like order.",
    "I am quick to understand things.",
    "I don't like to draw attention to myself.",
    "I take time out for others.",
    "I have frequent mood swings.",
    "I shirk my duties.",
    "I use difficult words.",
    "I don't mind being the center of attention.",
    "I feel others' emotions.",
    "I get irritated easily.",
    "I follow a schedule.",
    "I spend time reflecting on things.",
    "I am quiet around strangers.",
    "I make people feel at ease.",
    "I often feel blue.",
    "I am exacting in my work.",
    "I am full of ideas."
]

options = ["Completely Disagree", "Disagree", "Neutral", "Agree", "Completely Agree"]

question_index = 0
responses = [None] * len(questions)

output_area = Output()

def display_question(index):
    with output_area:
        output_area.clear_output()

        question_label = Label(value=questions[index])
        radio_buttons = RadioButtons(options=options, layout={'width': 'max-content'})

        def on_next_button_clicked(b):
            responses[index] = radio_buttons.value
            if index + 1 < len(questions):
                display_question(index + 1)
            else:
                display_results()

        next_button = Button(description="Next")
        next_button.on_click(on_next_button_clicked)

        display(VBox([question_label, radio_buttons, next_button]))

def display_results():
    extroversion = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[:10])
    conscientiousness = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[10:20])
    agreeableness = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[20:30])
    emotional_stability = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[30:40])
    neuroticism = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[40:50])

    random_user_ids = random.sample(list(train_data['user_index'].unique()), 10)
    recommendations = recommend_movies(aggregated_model[0], aggregated_model[1], random_user_ids[0], user_id_to_index, movie_id_to_index)

    with output_area:
        output_area.clear_output()
        print(f"Recommended movies for user based on personality traits: {recommendations}")

display_question(question_index)
display(output_area)

def calculate_personality_traits(responses):
    extroversion = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[:10])
    conscientiousness = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[10:20])
    agreeableness = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[20:30])
    emotional_stability = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[30:40])
    neuroticism = sum(1 if resp in ["Agree", "Completely Agree"] else 0 for resp in responses[40:50])
    return extroversion, conscientiousness, agreeableness, emotional_stability, neuroticism

def recommend_movies(user_factors, movie_factors, user_id, user_id_to_index, movie_id_to_index):
    user_index = user_id_to_index[user_id]
    user_vector = user_factors[user_index]
    predictions = np.dot(user_vector, movie_factors.T)
    recommended_movie_indices = np.argsort(predictions)[-10:]  # Top 10 recommendations
    recommended_movie_ids = [list(movie_id_to_index.keys())[list(movie_id_to_index.values()).index(i)] for i in recommended_movie_indices]
    return recommended_movie_ids


Output()