#Logistic Regression

#Imports

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

#Data Loading

In [2]:
#Load ratings data from the github
rating_path = "https://raw.githubusercontent.com/Bansal0527/Movie-Recomendation-System/main/Dataset/ratings.csv"
movies_path = 'https://raw.githubusercontent.com/Bansal0527/Movie-Recomendation-System/master/Dataset/movies.csv'

ratings = pd.read_csv(rating_path)
movies = pd.read_csv(movies_path)

print(ratings.head())


   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


In [4]:
# Define threshold for liked/disliked
threshold = 2

# Convert ratings to binary labels
ratings['liked'] = (ratings['rating'] >= threshold).astype(int)

# Prepare data for logistic regression
X = ratings[['userId', 'movieId']]
y = ratings['liked']




In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
# accuracy = accuracy_score(y_test, y_pred)
# print("Accuracy:", accuracy)

# # Classification report
# print("Classification Report:")
# print(classification_report(y_test, y_pred))



# Example of using the model to recommend movies for a specific user
user_id = 1
user_movies = ratings[ratings['userId'] == user_id]['movieId'].unique()
user_unrated_movies = ratings[~ratings['movieId'].isin(user_movies)]['movieId'].unique()

# Create a DataFrame of user's unrated movies
recommendation_X = pd.DataFrame({'userId': [user_id] * len(user_unrated_movies), 'movieId': user_unrated_movies})

# Predict probability of being liked
user_probabilities = model.predict_proba(recommendation_X)[:, 1]

# Find the top recommended movies
recommendations = pd.DataFrame({'movieId': user_unrated_movies, 'liked_probability': user_probabilities})
recommendations = recommendations.sort_values(by='liked_probability', ascending=False).head(10)

# Add movie names to recommendations
recommendations_with_names = pd.merge(recommendations, movies[['movieId', 'title']], on='movieId')

print("Top Recommended Movies for User", user_id, ":")
print(recommendations_with_names[['title', 'liked_probability']])

Top Recommended Movies for User 1 :
                                title  liked_probability
0                      Jumanji (1995)           0.953235
1            Waiting to Exhale (1995)           0.953234
2  Father of the Bride Part II (1995)           0.953234
3                      Sabrina (1995)           0.953234
4                 Tom and Huck (1995)           0.953234
5                 Sudden Death (1995)           0.953234
6                    GoldenEye (1995)           0.953234
7      American President, The (1995)           0.953234
8  Dracula: Dead and Loving It (1995)           0.953233
9                        Balto (1995)           0.953233
