### Data Preprocessing

In [None]:
import pandas as pd

# User data
user_data =   {
    "Id": 1,
    "Name": "Breno Brito",
    "Username": "BBrito",
    "Password": "1234",
    "Mood": "Relaxed",
    "Action": 5,
    "Adventure": 3,
    "Drama": 0,
    "Horror": 1,
    "Fantasy": 0,
    "Crime": 0,
    "Mystery": 0,
    "Animation": 0,
    "Thriller": 0,
    "Musical": 0,
    "Biography": 0,
    "Romance": 0,
    "Documentary": 0,
    "TotalMoviesWatched": 9,
    "AverageYear": 2008,
    "AverageRate": 67,
    "MoviesWatched": [1, 3, 4, 10, 16, 122, 136, 149, 38]
  }


# Load the movie dataset
movies_df = pd.read_csv("../data/movies.csv")

# Display the first few rows of the movie dataset
print("Movie dataset:")
print(movies_df.head())

# Check for missing values in the movie dataset
missing_values = movies_df.isnull().sum()
print("\nMissing values in movie dataset:")
print(missing_values)

# Handle missing values (if any)
movies_df.fillna("Unknown", inplace=True)

# Display the preprocessed movie dataset
print("\nPreprocessed movie dataset:")
print(movies_df.head())


### Define Mood-Genre Associations

In [None]:
# Define mood-genre associations
mood_genre_associations = {
    "Happy": ["Comedy", "Romance", "Animation"],
    "Sad": ["Drama", "Romance", "Biography"],
    "Angry": ["Action", "Crime", "Thriller"],
    "Excited": ["Adventure", "Action", "Fantasy"],
    "Anxious": ["Thriller", "Horror", "Mystery"],
    "Relaxed": ["Comedy", "Drama", "Documentary"],
    "Grateful": ["Musical", "Biography", "Romance"]
}

### Calculate Genre Percentages

In [8]:
# Calculate total movies watched
total_movies_watched = user_data["TotalMoviesWatched"]

# Calculate genre percentages individually
genre_percentages = {}
for genre, count in user_data.items():
    if genre in ["Id", "Name", "Username", "Password", "Mood", "TotalMoviesWatched", "AverageYear", "AverageRate", "MoviesWatched"]:
        continue
    genre_percentage = (count / total_movies_watched) * 100
    genre_percentages[genre] = genre_percentage

# Display genre percentages
print("Genre Percentages:")
for genre, percentage in genre_percentages.items():
    print(f"{genre}: {percentage:.2f}%")

Genre Percentages:
Action: 55.56%
Adventure: 33.33%
Drama: 0.00%
Horror: 11.11%
Fantasy: 0.00%
Crime: 0.00%
Mystery: 0.00%
Animation: 0.00%
Thriller: 0.00%
Musical: 0.00%
Biography: 0.00%
Romance: 0.00%
Documentary: 0.00%


### Normalize Criteria

In [9]:
# Define min-max normalization function
def min_max_normalize(value, min_val, max_val):
    return (value - min_val) / (max_val - min_val)

# Normalize mood (not necessary as it's a categorical variable)

# Normalize genre percentages
genre_percentages = {genre: user_data[genre] / user_data["TotalMoviesWatched"] for genre in user_data.keys() 
                     if genre != "Id" 
                     and genre != "Name" 
                     and genre != "Username" 
                     and genre != "Password" 
                     and genre != "Mood" 
                     and genre != "TotalMoviesWatched"
                     and genre != "AverageYear" 
                     and genre != "AverageRate" 
                     and genre != "MoviesWatched" }

# Normalize average rate
min_rate = 0  
max_rate = 100 
normalized_rate = min_max_normalize(user_data["AverageRate"], min_rate, max_rate)

# Normalize average year
min_year = 2007 
max_year = 2014  
normalized_year = min_max_normalize(user_data["AverageYear"], min_year, max_year)

# Display normalized criteria
print("Normalized Criteria:")
print("Genre Percentages:", genre_percentages)
print("Normalized Rate:", normalized_rate)
print("Normalized Year:", normalized_year)


Normalized Criteria:
Genre Percentages: {'Action': 0.5555555555555556, 'Adventure': 0.3333333333333333, 'Drama': 0.0, 'Horror': 0.1111111111111111, 'Fantasy': 0.0, 'Crime': 0.0, 'Mystery': 0.0, 'Animation': 0.0, 'Thriller': 0.0, 'Musical': 0.0, 'Biography': 0.0, 'Romance': 0.0, 'Documentary': 0.0}
Normalized Rate: 0.67
Normalized Year: 0.14285714285714285


### Apply Weights

In [10]:
# Predefined weights
weights = {
    "Mood": 0.3,
    "GenrePercentages": 0.4,
    "AverageRate": 0.2,
    "AverageYear": 0.1
}

# Apply weights to each normalized criteria
weighted_criteria = {}

# Apply weight to mood 
weighted_criteria["Mood"] = weights["Mood"] * 1 

# Apply weights to genre percentages
weighted_criteria["GenrePercentages"] = {genre: genre_percentage * weights["GenrePercentages"] for genre, genre_percentage in genre_percentages.items()}

# Apply weights to average rate and average year
weighted_criteria["AverageRate"] = normalized_rate * weights["AverageRate"]
weighted_criteria["AverageYear"] = normalized_year * weights["AverageYear"]

# Display weighted criteria
print("Weighted Criteria:")
print(weighted_criteria)

Weighted Criteria:
{'Mood': 0.3, 'GenrePercentages': {'Action': 0.22222222222222224, 'Adventure': 0.13333333333333333, 'Drama': 0.0, 'Horror': 0.044444444444444446, 'Fantasy': 0.0, 'Crime': 0.0, 'Mystery': 0.0, 'Animation': 0.0, 'Thriller': 0.0, 'Musical': 0.0, 'Biography': 0.0, 'Romance': 0.0, 'Documentary': 0.0}, 'AverageRate': 0.134, 'AverageYear': 0.014285714285714285}


### Combine Criteria

In [11]:
# Function to combine weighted criteria for a movie
def combine_criteria(movies_df, weighted_criteria):
    composite_scores = []

    for index, movie_data in movies_df.iterrows():
        composite_score = 0
        
        # Combine weighted criteria
        composite_score += weighted_criteria["Mood"] 
        
        # Multiply genre percentages by corresponding weights and add to composite score
        genre = movie_data["Genre"]
        composite_score += weighted_criteria["GenrePercentages"].get(genre, 0)
        
        # Multiply average rate and average year by their weights and add to composite score
        composite_score += movie_data["AudienceScore"] * weighted_criteria["AverageRate"]
        composite_score += (2014 - movie_data["Year"]) * weighted_criteria["AverageYear"] 
        
        composite_scores.append(composite_score)
    
    return composite_scores

# Calculate composite scores for all movies
composite_scores = combine_criteria(movies_df, weighted_criteria)

# Add composite scores as a new column to the DataFrame
movies_df['CompositeScore'] = composite_scores

# Display composite scores
print("Composite Scores:", composite_scores)


Composite Scores: [7.858222222222222, 8.038, 12.548222222222222, 10.538222222222222, 11.521333333333335, 9.646, 12.594000000000001, 10.048, 10.182, 12.682222222222222, 11.656, 10.852, 10.048, 11.522, 12.326, 9.734222222222222, 12.146222222222223, 7.992222222222222, 12.058000000000002, 11.120000000000001, 8.038, 12.058000000000002, 10.182, 11.656, 9.244000000000002, 10.316, 8.126222222222223, 7.904, 10.450000000000001, 7.904, 11.254000000000001, 10.450000000000001, 7.590222222222222, 10.450000000000001, 11.656, 7.992222222222222, 10.182, 9.824444444444445, 11.522, 8.976, 9.020444444444445, 6.6979999999999995, 9.064222222222222, 11.388000000000002, 11.923333333333334, 11.254000000000001, 9.780000000000001, 10.048, 7.77, 11.074222222222222, 11.610222222222223, 7.77, 8.976, 5.402444444444444, 11.656, 5.492, 8.796222222222223, 8.082444444444445, 12.057333333333334, 9.332222222222223, 5.894, 9.198222222222222, 8.574000000000002, 10.182, 7.904, 8.842, 10.450000000000001, 7.367999999999999, 8.

### Generate Recommendations

In [12]:
# Sort movies based on composite scores
recommended_movies = movies_df.sort_values(by='CompositeScore', ascending=False)

# Select top 10
top_n_recommendations = recommended_movies.head(10)

# Display recommended movies to the user
print("Recommended Movies:")
for index, movie in top_n_recommendations.iterrows():
    print(f"{movie['Movie']} ({movie['Year']}), Genre: {movie['Genre']}, Audience Score: {movie['AudienceScore']}, Composite Score: {movie['CompositeScore']}")



Recommended Movies:
The Dark Knight (2008), Genre: Thriller, Audience Score: 96, Composite Score: 13.249714285714287
Inception (2010), Genre: Action, Audience Score: 93, Composite Score: 13.04136507936508
Warrior (2011), Genre: Action, Audience Score: 93, Composite Score: 13.027079365079366
Avatar (2009), Genre: Action, Audience Score: 92, Composite Score: 12.921650793650794
The King's Speech (2010), Genre: Biography, Audience Score: 93, Composite Score: 12.81914285714286
50/50 (2011), Genre: Comedy, Audience Score: 93, Composite Score: 12.804857142857145
Iron Man (2008), Genre: Action, Audience Score: 91, Composite Score: 12.801936507936508
Star Trek (2009), Genre: Action, Audience Score: 91, Composite Score: 12.787650793650794
300 (2007), Genre: Action, Audience Score: 90, Composite Score: 12.682222222222222
Harry Potter and the Deathly Hallows Part 2 (2011), Genre: Fantasy, Audience Score: 92, Composite Score: 12.670857142857145
