In [None]:
import numpy as np
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
rating_dataset = pd.read_csv('/content/drive/MyDrive/Research/RAT (1).csv')

In [None]:
# Read the dataset from CSV
dataset = pd.read_csv('/content/drive/MyDrive/Research/V-15.0 (1).csv', sep=',', encoding='latin-1')

# Split the 'genres' column into multiple columns
genre_columns = dataset['genres'].str.get_dummies('|')

# Concatenate the original dataset with the genre columns
dataset = pd.concat([dataset, genre_columns], axis=1)

# Remove the original 'genres' column
dataset.drop('genres', axis=1, inplace=True)


In [None]:
movie_dataset = dataset[['movieId','title', 'Overview']]
movie_dataset.head()

Unnamed: 0,movieId,title,Overview
0,1,Toy Story,A cowboy doll is profoundly threatened and jea...
1,2,Jumanji,When two kids find and play a magical board ga...
2,3,Grumpier Old Men,John and Max resolve to save their beloved bai...
3,4,Waiting Exhale,"Based on Terry McMillan's novel, this film fol..."
4,5,Father Bride Part II,George Banks must deal not only with his daugh...


In [None]:
merged_dataset = pd.merge(rating_dataset, movie_dataset, how='inner', on='movieId')
merged_dataset.head()
merged_dataset.to_csv('MDB.csv', index=False)

### **User-Item matrix**

In [None]:
# Extract unique users and movies from the datasetum
users = merged_dataset['userId'].unique()
movies = merged_dataset['title'].unique()

# Create the user-movie rating matrix
UMR_matrix = np.zeros((len(users), len(movies)))

# Group the data by user and movie
grouped_data = merged_dataset.groupby(['userId', 'title'])

# Fill the UMR_matrix with ratings
for i, user in enumerate(users):
    for j, movie in enumerate(movies):
        try:
            rating = grouped_data.get_group((user, movie))['rating'].values[0]
            UMR_matrix[i][j] = rating
        except KeyError:
            UMR_matrix[i][j] = 0

selected_users = [614, 615, 611, 613, 9, 612, 61, 167, 215, 605]
selected_movies = ['Matrix', 'Lord Rings: Fellowship Ring', 'Lord Rings: Return King', 'Inception', 'Dark Knight',
          'Iron Man 3', 'Guardians Galaxy', 'Black Panther', 'Guardians Galaxy 2', 'Tomb Raider']

# Create a dictionary to map user indices to their respective formatted IDs
formatted_ids = {f"U{i+1}": user_id for i, user_id in enumerate(selected_users)}

# Calculate the maximum width of user IDs
max_user_width = max(len(formatted_id) for formatted_id in formatted_ids.keys())

# Create a dictionary to map movie indices to their respective formatted titles
formatted_titles = {f"M{i+1}": movie_title for i, movie_title in enumerate(selected_movies)}

# Calculate the maximum width of movie titles
max_movie_width = max(len(formatted_title) for formatted_title in formatted_titles.keys())

# Display the selected movies and users with their ratings
print("\nSelected Ratings:")
header = " "
for movie_index in range(1, len(selected_movies) + 1):
    movie_key = f"M{movie_index}"
    header += f"{movie_key:{max_movie_width}}\t"
print(f"{'':{max_user_width}}\t{header}")

for user_index in range(1, len(selected_users) + 1):
    user_key = f"U{user_index}"
    user_id = formatted_ids.get(user_key, "")
    row_data = f"{user_key:{max_user_width}}\t"
    for movie_index in range(1, len(selected_movies) + 1):
        movie_key = f"M{movie_index}"
        movie_title = formatted_titles.get(movie_key, "")
        user_ratings = merged_dataset.loc[(merged_dataset['userId'] == user_id) & (merged_dataset['title'] == movie_title)]
        rating = user_ratings['rating'].values[0] if len(user_ratings) > 0 else 0
        row_data += f"{rating:{max_movie_width}.1f}\t"
    print(row_data)

### **SVD**

In [None]:
from scipy.linalg import svd

# Load the user-item matrix
# Assuming you already have the UMR_matrix from Algorithm 1

# Add zero values to missing ratings in the user-item matrix
UMR_matrix_filled = np.nan_to_num(UMR_matrix)

# Set dimension k = 20 for matrix reduction
k = 20

# Calculate matrices U, Sigma, and V^T using SVD
U, Sigma, VT = svd(UMR_matrix_filled)

# Reduce matrices U, Sigma, and VT to dimension k
U_reduced = U[:, :k]
Sigma_reduced = np.diag(Sigma[:k])
VT_reduced = VT[:k, :]

# Calculate the predicted evaluation matrix
predicted_matrix = np.dot(np.dot(U_reduced, Sigma_reduced), VT_reduced)

# Display the predicted evaluation matrix
#print("Predicted Evaluation Matrix:")
#for row in predicted_matrix:
#    print(row)

# Display the predicted evaluation matrix for selected users and movie titles
selected_users = [614, 615, 611, 613, 9, 612, 61, 167, 215, 605]
selected_movies = ['Matrix', 'Lord Rings: Fellowship Ring', 'Lord Rings: Return King', 'Inception', 'Dark Knight',
          'Iron Man 3', 'Guardians Galaxy', 'Black Panther', 'Guardians Galaxy 2', 'Lara Croft: Tomb Raider']

# Create a dictionary to map user indices to their respective formatted IDs
formatted_ids = {f"U{i+1}": user_id for i, user_id in enumerate(selected_users)}

# Calculate the maximum width of user IDs
max_user_width = max(len(formatted_id) for formatted_id in formatted_ids.keys())

# Create a dictionary to map movie indices to their respective formatted titles
formatted_titles = {f"M{i+1}": movie_title for i, movie_title in enumerate(selected_movies)}

# Calculate the maximum width of movie titles
max_movie_width = max(len(formatted_title) for formatted_title in formatted_titles.keys())

# Display the selected users and movie titles with their predicted ratings
print("\nPredicted Ratings:")
header = ""
for movie_index in range(1, len(selected_movies) + 1):
    movie_key = f"M{movie_index}"
    header += f"{movie_key:{max_movie_width}}\t"
print(f"{'':{max_user_width}}\t{header}")

for user_index in range(1, len(selected_users) + 1):
    user_key = f"U{user_index}"
    user_id = formatted_ids.get(user_key, "")
    row_data = f"{user_key:{max_user_width}}\t"
    for movie_index in range(1, len(selected_movies) + 1):
        movie_key = f"M{movie_index}"
        movie_title = formatted_titles.get(movie_key, "")
        movie_index = np.where(movies == movie_title)[0][0]
        predicted_rating = predicted_matrix[user_id - 1][movie_index]
        row_data += f"{predicted_rating:{max_movie_width}.1f}\t"
    print(row_data)


In [None]:
genre_dataset= dataset[['movieId','Action', 'Adventure',
       'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama',
       'Fantasy', 'Film-Noir', 'Horror', 'IMAX', 'Musical', 'Mystery',
       'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']]
genre_merged=pd.merge(merged_dataset, genre_dataset, how='inner', on='movieId')

#### **FAV and UNPOPULAR GENRE**

In [None]:
import numpy as np
import pandas as pd

# Assuming you have a dataset dataframe 'genre_merged' containing movie ratings with separate genre columns

# Get the unique genres from the dataset
genres = genre_merged[['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']]

# Iterate over each user
for user_id in genre_merged['userId'].unique():
    # Get the ratings given by the user
    user_ratings = genre_merged.loc[genre_merged['userId'] == user_id, 'rating']

    # Calculate the average rating for each genre
    genre_ratings = []
    for genre in genres.columns:
        genre_ratings.append(np.mean(user_ratings[genres[genre] == 1]))

    # Sort the genres based on average ratings
    sorted_genres = sorted(zip(genres.columns, genre_ratings), key=lambda x: x[1], reverse=True)

    # Select the top 3 favorite genres
    favorite_genres = [genre for genre, rating in sorted_genres[:3]]

    # Select the bottom 3 unpopular genres
    unpopular_genres = [genre for genre, rating in sorted_genres[-3:]]

    print(f"User ID: {user_id}")
    print("Favorite Genres:", favorite_genres)
    print("Unpopular Genres:", unpopular_genres)
    print()


In [None]:
# Select a user to print their results
selected_user_id = 614

# Get the favorite and unpopular genres for the selected user
selected_user_ratings = genre_merged.loc[genre_merged['userId'] == selected_user_id, 'rating']
selected_user_genre_ratings = []
for genre in genres.columns:
    selected_user_genre_ratings.append(np.mean(selected_user_ratings[genres[genre] == 1]))
selected_user_sorted_genres = sorted(zip(genres.columns, selected_user_genre_ratings), key=lambda x: x[1], reverse=True)
selected_user_favorite_genres = [genre for genre, rating in selected_user_sorted_genres[:3]]
selected_user_unpopular_genres = [genre for genre, rating in selected_user_sorted_genres[-3:]]

# Print the results for the selected user
print(f"Selected User ID: {selected_user_id}")
print("Favorite Genres:", selected_user_favorite_genres)
print("Unpopular Genres:", selected_user_unpopular_genres)

#### **Highest predicted evaluation for users**

In [None]:
import pandas as pd

# Assuming you have a predicted evaluation matrix 'predicted_ratings' and a dataframe 'genre_merged' containing movie ratings and genres

# Create an empty list to store the recommended movies for all users
all_users_movies = []

# Iterate over each user
for user_id in genre_merged['userId'].unique():
    # Select the row of predicted evaluations for the user
    user_predicted_ratings = predicted_matrix[user_id - 1]

    # Get the movies that the user has not rated yet
    unrated_movies = genre_merged[~genre_merged['movieId'].isin(genre_merged[genre_merged['userId'] == user_id]['movieId'].unique())]

    # Create a new dataframe with unrated movies and their predicted ratings
    unrated_movies_with_ratings = unrated_movies.merge(pd.DataFrame({'predicted_rating': user_predicted_ratings}), left_on='movieId', right_index=True)

    # Remove duplicate movies (if any)
    unrated_movies_with_ratings = unrated_movies_with_ratings.drop_duplicates(subset=['movieId'])

    # Sort the unrated movies based on the predicted ratings in descending order
    sorted_movies = unrated_movies_with_ratings.sort_values(by='predicted_rating', ascending=False)

    # Prepare the table data
    table_data = []
    for _, row in sorted_movies.iterrows():
        movie_id = row['movieId']
        movie_title = row['title']
        Over_view=row['Overview']
        predicted_rating = row['predicted_rating']
        table_data.append([user_id, movie_id, movie_title, predicted_rating,Over_view ])

    # Create a DataFrame and format the columns
    table_columns = ['userId', 'Movie ID', 'title', 'Predicted Rating', 'Overview']

    # Fetch top recommended movies (first 25)
    top_movies = table_data[:25]

    table_df = pd.DataFrame(top_movies, columns=table_columns)

    # Remove duplicate movies
    unique_movies = table_df.drop_duplicates(subset=['Movie ID'])

    # Add the unique recommended movies for the current user to the list
    all_users_movies.append(unique_movies)

# Print the recommended movies for all users
for i, user_movies in enumerate(all_users_movies):
    user_id = genre_merged['userId'].unique()[i]
    print(f"Recommended movies for User ID {user_id}:")
    print(user_movies.to_string(index=False))
    print()



In [None]:
# Find the index of the selected user in the list
selected_user_index = genre_merged['userId'].unique().tolist().index(selected_user_id)

# Check if the selected user exists in the list
if selected_user_index != -1:
    # Get the recommended movies for the selected user
    selected_user_movies = all_users_movies[selected_user_index]

    # Print the recommended movies for the selected user
    print(f"Recommended movies for User ID {selected_user_id}:")
    print(selected_user_movies.to_string(index=False))
else:
    print(f"No recommended movies found for User ID {selected_user_id}.")


#### **MOVIES WITHOUT UNPOPULAR GENRE**

In [None]:
import pandas as pd
import numpy as np

# Assuming you have a predicted evaluation matrix 'predicted_matrix' and a dataframe 'genre_merged' containing movie ratings with separate genre columns

# Get the unique genres from the dataset
genres = genre_merged[['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']]

# Create an empty list to store the recommended movies for all users
all_users_movies = []

# Iterate over each user
for user_id in genre_merged['userId'].unique():
    # Get the ratings given by the user
    user_ratings = genre_merged.loc[genre_merged['userId'] == user_id, 'rating']

    # Calculate the average rating for each genre
    genre_ratings = []
    for genre in genres.columns:
        genre_ratings.append(np.mean(user_ratings[genres[genre] == 1]))

    # Sort the genres based on average ratings
    sorted_genres = sorted(zip(genres.columns, genre_ratings), key=lambda x: x[1], reverse=True)

    # Select the top 3 favorite genres
    favorite_genres = [genre for genre, rating in sorted_genres[:3]]

    # Select the bottom 3 unpopular genres
    unpopular_genres = [genre for genre, rating in sorted_genres[-3:]]

    # Select the row of predicted evaluations for the user
    user_predicted_ratings = predicted_matrix[user_id - 1]

    # Get the movies that the user has not rated yet and filter out movies with unpopular genres
    unrated_movies = genre_merged[~genre_merged['movieId'].isin(genre_merged[genre_merged['userId'] == user_id]['movieId'].unique())]
    unrated_movies = unrated_movies[~unrated_movies[favorite_genres].any(axis=1)]

    # Create a new dataframe with unrated movies and their predicted ratings
    unrated_movies_with_ratings = unrated_movies.merge(pd.DataFrame(user_predicted_ratings, columns=['predicted_rating']), left_on='movieId', right_index=True)

    # Sort the unrated movies based on the predicted ratings in descending order
    sorted_movies = unrated_movies_with_ratings.sort_values(by='predicted_rating', ascending=False)


    # Remove duplicate movies
    # Prepare the table data
    table_data = []
    for _, row in sorted_movies.iterrows():

        movie_id = row['movieId']
        movie_title = row['title']
        predicted_rating = row['predicted_rating']
        Over_view=row['Overview']
        table_data.append([user_id, movie_id, movie_title, predicted_rating, Over_view])

    # Create a DataFrame and format the columns
    table_columns = ['userId', 'Movie ID', 'title', 'Predicted Rating', 'Overview']
    unique_movies = pd.DataFrame(table_data, columns=table_columns).drop_duplicates(subset=['Movie ID'])
    top_movies = unique_movies.head(25)
    #Create a DataFrame and format the columns

    table_df = pd.DataFrame(top_movies, columns=table_columns)

    # Append the recommended movies for the user to the list
    all_users_movies.append(table_df)

# Print the recommended movies for all users
for i, user_movies in enumerate(all_users_movies):
    user_id = genre_merged['userId'].unique()[i]
    print(f"Recommended movies for User ID {user_id}:")
    print(user_movies.to_string(index=False))
    print()



In [None]:
# Find the index of the selected user in the list
selected_user_index = genre_merged['userId'].unique().tolist().index(selected_user_id)

# Check if the selected user exists in the list
if selected_user_index != -1:
    # Get the recommended movies for the selected user
    selected_user_movies = all_users_movies[selected_user_index]

    # Print the recommended movies for the selected user
    print(f"Recommended movies for User ID {selected_user_id}:")
    print(selected_user_movies.to_string(index=False))
else:
    print(f"No recommended movies found for User ID {selected_user_id}.")


### **movies from my favorite genres**

#### **YES**

In [None]:
import numpy as np
import pandas as pd

# Assuming you have a dataset dataframe 'genre_merged' containing movie ratings with separate genre columns

# Get the unique genres from the dataset
genres = genre_merged[['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']]

# Define a list to store the recommendations for all users
all_recommendations = []

# Loop through each user
for user_id in genre_merged['userId'].unique():
    # Get the ratings given by the user
    user_ratings = genre_merged.loc[genre_merged['userId'] == user_id, 'rating']

    # Calculate the average rating for each genre
    genre_ratings = []
    for genre in genres.columns:
        genre_ratings.append(np.mean(user_ratings[genres[genre] == 1]))

    # Sort the genres based on average ratings
    sorted_genres = sorted(zip(genres.columns, genre_ratings), key=lambda x: x[1], reverse=True)

    # Select the top 3 favorite genres
    favorite_genres = [genre for genre, rating in sorted_genres[:3]]

    # Select the bottom 3 unpopular genres
    unpopular_genres = [genre for genre, rating in sorted_genres[-3:]]

    # Filter the unrated movies based on the user's favorite genres
    unrated_movies = genre_merged[~genre_merged['movieId'].isin(genre_merged[genre_merged['userId'] == user_id]['movieId'].unique())]
    unrated_movies = unrated_movies[unrated_movies[favorite_genres].sum(axis=1) == 0]

    # Create a new dataframe with unrated movies and their predicted ratings
    unrated_movies_with_ratings = unrated_movies.merge(pd.DataFrame(user_predicted_ratings, columns=['predicted_rating']), left_on='movieId', right_index=True)

    # Adjust predicted evaluations for movies of favorite genres
    unrated_movies_with_ratings.loc[unrated_movies_with_ratings[unrated_movies_with_ratings.columns.intersection(favorite_genres)].sum(axis=1) > 0, 'predicted_rating'] *= 2

    # Sort the unrated movies based on the adjusted predicted ratings in descending order
    sorted_movies = unrated_movies_with_ratings.sort_values(by='predicted_rating', ascending=False)

    # Remove duplicate movies
    sorted_movies = sorted_movies.drop_duplicates(subset='movieId')

    # Prepare the table data
    table_data = []
    for _, row in sorted_movies.iterrows():
        movie_id = row['movieId']
        movie_title = row['title']
        predicted_rating = row['predicted_rating']
        Over_view= row['Overview']
        table_data.append([user_id, movie_id, movie_title, predicted_rating, Over_view])

    # Append recommendations for the current user to the list
    all_recommendations.extend(table_data)

# Define table columns
table_columns = ['userId', 'Movie ID', 'Title', 'Predicted Rating', 'Overview']

# Create a DataFrame with all recommendations
table_df = pd.DataFrame(all_recommendations, columns=table_columns)

# Highlight predicted evaluations for each user
table_df['Predicted Rating'] = table_df['Predicted Rating'].apply(lambda x: f"{x:.4f}" if x > 0 else f"{x:.4f}")

# Print the top 25 unique recommended movies for each user
for user_id in genre_merged['userId'].unique():
    user_movies = table_df[table_df['userId'] == user_id].head(25)
    print(f"Recommendations for user {user_id}:")
    print(user_movies.to_string(index=False))
    print()

## **CONTENT**

In [None]:
table_df.rename(columns={'Title': 'title'}, inplace=True)
table_df.rename(columns={'Movie ID': 'movieId'}, inplace=True)


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the top 25 movies DataFrame
top_movies = table_df
# Load the movie details DataFrame
movie_details = table_df


vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 3))

# Process the data in chunks
chunk_size = 80000
overview_tokens = None

for chunk_start in range(0, table_df.shape[0], chunk_size):
    chunk_end = min(chunk_start + chunk_size, table_df.shape[0])
    chunk_data = table_df.iloc[chunk_start:chunk_end]['Overview'].astype(str)

    if overview_tokens is None:
        overview_tokens = vectorizer.fit_transform(chunk_data)
    else:
        overview_tokens = vectorizer.transform(chunk_data)


In [None]:
# Calculate similarity between each movie pair in batches
batch_size = 100  # Adjust the batch size based on available memory
num_movies = len(table_df)
similarity_matrix = []

for i in range(0, num_movies, batch_size):
    start = i
    end = min(i + batch_size, num_movies)
    # Print the values of 'start' and 'end'
    print("start:", start, "end:", end)
    batch_tokens = overview_tokens[start:end]
    if batch_tokens.shape[0] == 0:
        continue
    print("Shape of batch_tokens:", batch_tokens.shape)
    batch_similarity = cosine_similarity(batch_tokens, overview_tokens)
    similarity_matrix.append(batch_similarity)

# Concatenate the similarity matrices from each batch
similarity_matrix = np.concatenate(similarity_matrix, axis=0)
print("Shape of similarity_matrix:", similarity_matrix.shape)


In [None]:
import pandas as pd

# Create a DataFrame to store the results
result_df = pd.DataFrame(columns=['Name of movie', 'Predicted Rating', 'Evaluation Similarity'])

# Set the number of similar movies to include in the result
num_similar_movies = 25

# Check the dimensions of the similarity matrix
num_movies = similarity_matrix.shape[0]

# Iterate over each movie in the subset
for movie in table_df.itertuples():
    movie_title = movie.title
    movie_index = movie.Index  # Assuming 'Index' represents the movie index in the similarity matrix

    # Check if the movie index is within the bounds of the similarity matrix
    if movie_index >= num_movies:
        continue

    predicted_rating = movie._4  # Assuming the 'Predicted Rating' column is at index 4

    # Get the row of the similarity matrix corresponding to the current movie
    similarity_scores = similarity_matrix[movie_index]

    # Find the indices of the top similar movies (excluding the current movie itself)
    top_indices = similarity_scores.argsort()[:-num_similar_movies-1:-1]

    # Get the movie names and similarity scores for the top similar movies
    similar_movies = table_df.loc[top_indices, ['title', 'Predicted Rating']]
    similar_movies['Evaluation Similarity'] = similarity_scores[top_indices]

    # Append the current movie's details to the result DataFrame
    result_df = pd.concat([result_df, pd.DataFrame({'Name of movie': [movie_title],
                                                    'Predicted Rating': [predicted_rating],
                                                    'Evaluation Similarity': ['']})], ignore_index=True)

    # Append the similar movies' details to the result DataFrame
    result_df = pd.concat([result_df, similar_movies], ignore_index=True)

# Write the result DataFrame to a CSV file
result_df.head(25).to_csv('result-1002.csv', index=False)



In [None]:
average_ratings = merged_dataset.groupby('title')['rating'].mean().reset_index()


In [None]:
average_ratings

In [None]:
filtered_df = merged_dataset[(merged_dataset['rating'] >= 0) & (merged_dataset['rating'] <= 350)]

# Count the total number of movie ratings within the interval
num_Rating = filtered_df['title'].value_counts().reset_index()

# Rename the columns
num_Rating.columns = ['title', 'Number of ratings']

num_Rating

In [None]:
merged_df1 = pd.merge(num_Rating, average_ratings, on='title')
merged_df1=merged_df1.rename(columns={'rating': 'avg_rating'})

In [None]:
merged_df2 = pd.merge(result_df, merged_df1, on='title', how='inner')

In [None]:


# Sort by evaluation similarity in descending order
merged_df3 = merged_df2.sort_values(by='Evaluation Similarity', ascending=False)

# Select the top 25 rows
merged_df3 = merged_df2.drop_duplicates(subset='title').head(25)

In [None]:
merged_df3  = merged_df3.drop('Name of movie', axis=1)

In [None]:
desired_columns = ['title', 'Predicted Rating', 'Evaluation Similarity', 'avg_rating', 'Number of ratings']  # Specify the order of columns

# Reindex the columns of the DataFrame
merged_df3 = merged_df3.reindex(columns=desired_columns)

In [None]:
merged_df3['Evaluation Similarity'] = merged_df3['Number of ratings'].astype(int)

EXPERT SYSTEM

In [None]:
merged_df3['Predicted Rating'] = merged_df3['Predicted Rating'] .astype('float')

In [None]:
import skfuzzy as fuzz
from skfuzzy import control as ctrl

# Create Antecedent and Consequent objects for the linguistic variables
inp1 = ctrl.Antecedent(np.arange(0, 5.1, 0.1), 'inp1')
inp2 = ctrl.Antecedent(np.arange(0, 351, 1), 'inp2')
inp3 = ctrl.Antecedent(np.arange(0, 1.1, 0.1), 'inp3')

importance = ctrl.Consequent(np.arange(0, 1.1, 0.1), 'importance')
importance['low'] = fuzz.trimf(importance.universe, [0, 0, 0.25])
importance['very low'] = fuzz.trimf(importance.universe, [0, 0.1, 0.3])
importance['medium'] = fuzz.trimf(importance.universe, [0.2, 0.4, 0.6])
importance['high'] = fuzz.trimf(importance.universe, [0.5, 0.7, 0.9])
importance['very high'] = fuzz.trimf(importance.universe, [0.75, 1, 1])




# Define the membership functions for the linguistic variables
# Example membership functions for input variables

inp1['low'] = fuzz.trimf(inp1.universe, [0, 0, 2])
inp1['medium'] = fuzz.trimf(inp1.universe, [1, 2, 3])
inp1['high'] = fuzz.trimf(inp1.universe, [2, 4, 5])



# Define membership functions for other input variables (inp2, inp3) and the output variable (importance)

inp2['few'] = fuzz.trimf(inp2.universe, [0, 0, 100])
inp2['medium'] = fuzz.trimf(inp2.universe, [50, 150, 250])
inp2['much'] = fuzz.trimf(inp2.universe, [150, 250, 350])
inp2['very much'] = fuzz.trimf(inp2.universe, [250, 350, 350])


inp3['low'] = fuzz.trimf(inp3.universe, [0, 0, 0.25])
inp3['medium'] = fuzz.trimf(inp3.universe, [0.1, 0.4, 0.7])
inp3['high'] = fuzz.trimf(inp3.universe, [0.4, 0.7, 0.9])
inp3['very high'] = fuzz.trimf(inp3.universe, [0.75, 1, 1])


# Define the fuzzy rules
rule1 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['low'], importance['very low'])
rule2 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['medium'], importance['very low'])
rule3 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['high'], importance['very low'])
rule4 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['very high'], importance['low'])
rule5 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['low'], importance['very low'])
rule6 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['medium'], importance['very low'])
rule7 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['high'], importance['very low'])
rule8 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['very high'], importance['very low'])
rule9 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['low'], importance['very low'])
rule10 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['medium'], importance['very low'])
rule11 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['high'], importance['very low'])
rule12 = ctrl.Rule(inp1['low'] & inp2['few'] & inp3['very high'], importance['very low'])
rule13 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['low'], importance['very low'])
rule14 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['medium'], importance['very low'])
rule15 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['high'], importance['very low'])
rule16 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['very high'], importance['very low'])
rule17 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['low'], importance['very low'])
rule18 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['medium'], importance['very low'])
rule19 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['high'], importance['very low'])
rule20 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['very high'], importance['very low'])
rule21 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['low'], importance['very low'])
rule22 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['medium'], importance['very low'])
rule23 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['high'], importance['very low'])
rule24 = ctrl.Rule(inp1['low'] & inp2['medium'] & inp3['very high'], importance['very low'])
rule25 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['low'], importance['very low'])
rule26 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['medium'], importance['very low'])
rule27 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['high'], importance['very low'])
rule28 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['very high'], importance['very low'])
rule29 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['low'], importance['very low'])
rule30 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['medium'], importance['very low'])
rule31 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['high'], importance['very low'])
rule32 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['very high'], importance['very low'])
rule33 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['low'], importance['very low'])
rule34 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['medium'], importance['very low'])
rule35 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['high'], importance['very low'])
rule36 = ctrl.Rule(inp1['low'] & inp2['much'] & inp3['very high'], importance['very low'])
rule37 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['low'], importance['very low'])
rule38 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['medium'], importance['very low'])
rule39 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['high'], importance['very low'])
rule40 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['very high'], importance['very low'])
rule41 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['low'], importance['very low'])
rule42 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['medium'], importance['very low'])
rule43 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['high'], importance['very low'])
rule44 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['very high'], importance['very low'])
rule45 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['low'], importance['very low'])
rule46 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['medium'], importance['very low'])
rule47 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['high'], importance['very low'])
rule48 = ctrl.Rule(inp1['low'] & inp2['very much'] & inp3['very high'], importance['very low'])
rule49 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['low'], importance['low'])
rule50 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['medium'], importance['low'])
rule51 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['high'], importance['low'])
rule52 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['very high'], importance['medium'])
rule53 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['low'], importance['very low'])
rule54 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['medium'], importance['very low'])
rule55 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['high'], importance['very low'])
rule56 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['very high'], importance['low'])
rule57 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['low'], importance['very low'])
rule58 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['medium'], importance['very low'])
rule59 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['high'], importance['very low'])
rule60 = ctrl.Rule(inp1['medium'] & inp2['few'] & inp3['very high'], importance['low'])
rule61 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['low'], importance['low'])
rule62 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['medium'], importance['medium'])
rule63 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['high'], importance['medium'])
rule64 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['very high'], importance['medium'])
rule65 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['low'], importance['low'])
rule66 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['medium'], importance['low'])
rule67 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['high'], importance['low'])
rule68 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['very high'], importance['medium'])
rule69 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['low'], importance['low'])
rule70 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['medium'], importance['low'])
rule71 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['high'], importance['low'])
rule72 = ctrl.Rule(inp1['medium'] & inp2['medium'] & inp3['very high'], importance['low'])
rule73 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['low'], importance['medium'])
rule74 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['medium'], importance['medium'])
rule75 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['high'], importance['medium'])
rule76 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['very high'], importance['medium'])
rule77 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['low'], importance['low'])
rule78 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['medium'], importance['low'])
rule79 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['high'], importance['medium'])
rule80 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['very high'], importance['medium'])
rule81 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['low'], importance['low'])
rule82 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['medium'], importance['low'])
rule83 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['high'], importance['low'])
rule84 = ctrl.Rule(inp1['medium'] & inp2['much'] & inp3['very high'], importance['medium'])
rule85 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['low'], importance['medium'])
rule86 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['medium'], importance['medium'])
rule87 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['high'], importance['medium'])
rule88 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['very high'], importance['medium'])
rule89 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['low'], importance['low'])
rule90 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['medium'], importance['medium'])
rule91 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['high'], importance['medium'])
rule92 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['very high'], importance['medium'])
rule93 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['low'], importance['low'])
rule94 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['medium'], importance['low'])
rule95 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['high'], importance['medium'])
rule96 = ctrl.Rule(inp1['medium'] & inp2['very much'] & inp3['very high'], importance['medium'])
rule97 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['low'], importance['low'])
rule98 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['medium'], importance['low'])
rule99 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['high'], importance['medium'])
rule100 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['very high'], importance['high'])
rule101 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['low'], importance['low'])
rule102 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['medium'], importance['low'])
rule103 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['high'], importance['medium'])
rule104 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['very high'], importance['medium'])
rule105 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['low'], importance['low'])
rule106 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['medium'], importance['low'])
rule107 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['high'], importance['low'])
rule108 = ctrl.Rule(inp1['high'] & inp2['few'] & inp3['very high'], importance['medium'])
rule109 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['low'], importance['medium'])
rule110 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['medium'], importance['medium'])
rule111 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['high'], importance['high'])
rule112 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['very high'], importance['high'])
rule113 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['low'], importance['medium'])
rule114 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['medium'], importance['medium'])
rule115 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['high'], importance['medium'])
rule116 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['very high'], importance['high'])
rule117 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['low'], importance['medium'])
rule118 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['medium'], importance['medium'])
rule119 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['high'], importance['medium'])
rule120 = ctrl.Rule(inp1['high'] & inp2['medium'] & inp3['very high'], importance['medium'])
rule121 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['low'], importance['medium'])
rule122 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['medium'], importance['high'])
rule123 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['high'], importance['high'])
rule124 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['very high'], importance['very high'])
rule125 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['low'], importance['medium'])
rule126 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['medium'], importance['medium'])
rule127 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['high'], importance['high'])
rule128 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['very high'], importance['high'])
rule129 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['low'], importance['medium'])
rule130 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['medium'], importance['medium'])
rule131 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['high'], importance['medium'])
rule132 = ctrl.Rule(inp1['high'] & inp2['much'] & inp3['very high'], importance['high'])
rule133 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['low'], importance['high'])
rule134 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['medium'], importance['high'])
rule135 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['high'], importance['very high'])
rule136 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['very high'], importance['very high'])
rule137 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['low'], importance['high'])
rule138 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['medium'], importance['high'])
rule139 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['high'], importance['high'])
rule140 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['very high'], importance['very high'])
rule141 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['low'], importance['medium'])
rule142 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['medium'], importance['high'])
rule143 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['high'], importance['high'])
rule144 = ctrl.Rule(inp1['high'] & inp2['very much'] & inp3['very high'], importance['high'])
# Define the control system
system = ctrl.ControlSystem([
rule1, rule2, rule3, rule4, rule5, rule6, rule7, rule8, rule9, rule10,
rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19, rule20,
rule21, rule22, rule23, rule24, rule25, rule26, rule27, rule28, rule29, rule30,
rule31, rule32, rule33, rule34, rule35, rule36, rule37, rule38, rule39, rule40,
rule41, rule42, rule43, rule44, rule45, rule46, rule47, rule48, rule49, rule50,
rule51, rule52, rule53, rule54, rule55, rule56, rule57, rule58, rule59, rule60,
rule61, rule62, rule63, rule64, rule65, rule66, rule67, rule68, rule69, rule70,
rule71, rule72, rule73, rule74, rule75, rule76, rule77, rule78, rule79, rule80,
rule81, rule82, rule83, rule84, rule85, rule86, rule87, rule88, rule89, rule90,
rule91, rule92, rule93, rule94, rule95, rule96, rule97, rule98, rule99, rule100,
rule101, rule102, rule103, rule104, rule105, rule106, rule107, rule108, rule109, rule110,
rule111, rule112, rule113, rule114, rule115, rule116, rule117, rule118, rule119, rule120,
rule121, rule122, rule123, rule124, rule125, rule126, rule127, rule128, rule129, rule130,
rule131, rule132, rule133, rule134, rule135, rule136, rule137, rule138, rule139, rule140,
rule141, rule142, rule143, rule144
])


# Define the control system simulation
#imp_sim = ctrl.ControlSystemSimulation(imp_ctrl)

# Pass inputs to the ControlSystemSimulation
#imp_sim.input['inp1'] = merged_df3['avg_rating'].values
#imp_sim.input['inp2'] = merged_df3['Number of ratings'].values
#imp_sim.input['inp3'] = merged_df3['Evaluation Similarity'].values

# Crunch the numbers
#imp_sim.compute()

# Retrieve the output
#output = imp_sim.output['importance']

expert_system = ctrl.ControlSystemSimulation(system)

# Iterate over the rows of merged_df
for index, row in merged_df3.iterrows():
    # Set the input values
    expert_system.input['inp1'] = row['avg_rating']
    expert_system.input['inp2'] = row['Number of ratings']
    expert_system.input['inp3'] = row['Evaluation Similarity']

    # Compute the output value
    expert_system.compute()

    # Get the output value
    importance_value = expert_system.output['importance']

    # Calculate the final evaluation based on EXS IMPORTANCE and predicted evaluation
    predicted_evaluation = row['Predicted Rating']

    # Check if the predicted evaluation is a valid numerical value
    if pd.notnull(predicted_evaluation):
        # Calculate the final evaluation based on EXS IMPORTANCE and predicted evaluation
        if predicted_evaluation <= 0:
            final_evaluation = predicted_evaluation * row['EXS IMPORTANCE']
        else:
            final_evaluation = predicted_evaluation * (1 + row['EXS IMPORTANCE'])
    else:
        # Handle cases where the predicted evaluation is not a valid numerical value
        final_evaluation = 0

    # Update the 'Final Evaluation' column in merged_df
    merged_df3.loc[index, 'Final Evaluation'] = final_evaluation

# Sort the DataFrame by 'Final Evaluation' column in descending order
merged_df4 = merged_df3.sort_values(by='Final Evaluation', ascending=False)

# Select the top 25 movies with the highest 'Final Evaluation' values
top_25_movies_F = merged_df4.head(25)

# Print the top 25 movies with their corresponding 'Final Evaluation' values
print(top_25_movies_F[['title', 'Predicted Rating','EXS IMPORTANCE','Final Evaluation']])



In [None]:
top_25_movies_F