### Data Preprocessing

In [None]:
import pandas as pd

# User data
user_data =   {
    "Id": 1,
    "Name": "Breno Brito",
    "Username": "BBrito",
    "Password": "1234",
    "Mood": "Relaxed",
    "Action": 5,
    "Adventure": 3,
    "Drama": 0,
    "Horror": 1,
    "Fantasy": 0,
    "Crime": 0,
    "Mystery": 0,
    "Animation": 0,
    "Thriller": 0,
    "Musical": 0,
    "Biography": 0,
    "Romance": 0,
    "Documentary": 0,
    "TotalMoviesWatched": 9,
    "AverageYear": 2008,
    "AverageRate": 67,
    "MoviesWatched": [1, 3, 4, 10, 16, 122, 136, 149, 38]
  }


# Load the movie dataset
movies_df = pd.read_csv("../data/movies.csv")

# Display the first few rows of the movie dataset
print("Movie dataset:")
print(movies_df.head())

# Check for missing values in the movie dataset
missing_values = movies_df.isnull().sum()
print("\nMissing values in movie dataset:")
print(missing_values)

# Handle missing values (if any)
movies_df.fillna("Unknown", inplace=True)

# Display the preprocessed movie dataset
print("\nPreprocessed movie dataset:")
print(movies_df.head())


### Define Mood-Genre Associations

In [None]:
# Define mood-genre associations
mood_genre_associations = {
    "Happy": ["Comedy", "Romance", "Animation"],
    "Sad": ["Drama", "Romance", "Biography"],
    "Angry": ["Action", "Crime", "Thriller"],
    "Excited": ["Adventure", "Action", "Fantasy"],
    "Anxious": ["Thriller", "Horror", "Mystery"],
    "Relaxed": ["Comedy", "Drama", "Documentary"],
    "Grateful": ["Musical", "Biography", "Romance"]
}

### Calculate Genre Percentages

In [None]:
# Calculate total movies watched
total_movies_watched = user_data["TotalMoviesWatched"]

# Calculate genre percentages individually
genre_percentages = {}
for genre, count in user_data.items():
    if genre in ["Id", "Name", "Username", "Password", "Mood", "TotalMoviesWatched", "AverageYear", "AverageRate", "MoviesWatched"]:
        continue
    genre_percentage = (count / total_movies_watched) * 100
    genre_percentages[genre] = genre_percentage

# Display genre percentages
print("Genre Percentages:")
for genre, percentage in genre_percentages.items():
    print(f"{genre}: {percentage:.2f}%")

### Normalize Criteria

In [None]:
# Define min-max normalization function
def min_max_normalize(value, min_val, max_val):
    return (value - min_val) / (max_val - min_val)

# Normalize mood (not necessary as it's a categorical variable)

# Normalize genre percentages
genre_percentages = {genre: user_data[genre] / user_data["TotalMoviesWatched"] for genre in user_data.keys() 
                     if genre != "Id" 
                     and genre != "Name" 
                     and genre != "Username" 
                     and genre != "Password" 
                     and genre != "Mood" 
                     and genre != "TotalMoviesWatched"
                     and genre != "AverageYear" 
                     and genre != "AverageRate" 
                     and genre != "MoviesWatched" }

# Normalize average rate
min_rate = 0  
max_rate = 100 
normalized_rate = min_max_normalize(user_data["AverageRate"], min_rate, max_rate)

# Normalize average year
min_year = 2007 
max_year = 2014  
normalized_year = min_max_normalize(user_data["AverageYear"], min_year, max_year)

# Display normalized criteria
print("Normalized Criteria:")
print("Genre Percentages:", genre_percentages)
print("Normalized Rate:", normalized_rate)
print("Normalized Year:", normalized_year)


### Apply Weights

In [None]:
# Predefined weights
weights = {
    "Mood": 0.2,
    "GenrePercentages": 0.3,
    "AverageRate": 0.4,
    "AverageYear": 0.1
}

### Combine Criteria

In [None]:
# Function to combine weighted criteria for a movie
def combine_criteria(movies_df, weights, user_data, mood_genre_associations):
    composite_scores = []

    for index, movie_data in movies_df.iterrows():
        composite_score = 0
        
        # Relevant genres based on user's mood
        user_mood = user_data["Mood"]
        relevant_genres = mood_genre_associations.get(user_mood, [])
        
        # Add composite score if the movie's genre is in the relevant genres
        if movie_data["Genre"] in relevant_genres:
            composite_score += weights["Mood"]
        
        # Multiply genre percentages by corresponding weights and add to composite score
        genre = movie_data["Genre"]
        composite_score += genre_percentages[genre] * weights["GenrePercentages"]

        # Add movie rate to composite score
        composite_score += movie_data["AudienceScore"] / 100 * weights["AverageRate"]

        # Calculate the contribution of the average year and add to composite score
        composite_score += (1 - abs(movie_data["Year"] - user_data["AverageYear"]) / (max_year - min_year)) * weights["AverageYear"]
        
        composite_scores.append(composite_score)
    
    return composite_scores


# Calculate composite scores for all movies
composite_scores = combine_criteria(movies_df, weights, user_data, mood_genre_associations)

# Add composite scores as a new column to the DataFrame
movies_df['CompositeScore'] = composite_scores

# Display composite scores
print("Composite Scores:", composite_scores)


### Generate Recommendations

In [None]:
# Sort movies based on composite scores
recommended_movies = movies_df.sort_values(by='CompositeScore', ascending=False)

# Select top 10
top_n_recommendations = recommended_movies.head(10)

# Display recommended movies to the user
print("Recommended Movies:")
for index, movie in top_n_recommendations.iterrows():
    print(f"{movie['Movie']} ({movie['Year']}), Genre: {movie['Genre']}, Audience Score: {movie['AudienceScore']}, Composite Score: {movie['CompositeScore']}")

