# **CF movie recommendation engine**

### Importing libraries

In [1]:
import pandas as pd
import numpy as np

### Loading Movies CSV

In [2]:
movies_df = pd.read_csv("./data/movies.csv")
print(movies_df.info())
movies_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86537 entries, 0 to 86536
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  86537 non-null  int64 
 1   title    86537 non-null  object
 2   genres   86537 non-null  object
dtypes: int64(1), object(2)
memory usage: 2.0+ MB
None


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


### Loading ratings CSV

In [3]:
ratings_df = pd.read_csv("./data/ratings.csv")
print(ratings_df.info())
ratings_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33832162 entries, 0 to 33832161
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   userId     int64  
 1   movieId    int64  
 2   rating     float64
 3   timestamp  int64  
dtypes: float64(1), int64(3)
memory usage: 1.0 GB
None


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,1225734739
1,1,110,4.0,1225865086
2,1,158,4.0,1225733503
3,1,260,4.5,1225735204
4,1,356,5.0,1225735119


### Loading tags CSV

In [4]:
tags_df = pd.read_csv("./data/tags.csv")
print(tags_df.info())
tags_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2328315 entries, 0 to 2328314
Data columns (total 4 columns):
 #   Column     Dtype 
---  ------     ----- 
 0   userId     int64 
 1   movieId    int64 
 2   tag        object
 3   timestamp  int64 
dtypes: int64(3), object(1)
memory usage: 71.1+ MB
None


Unnamed: 0,userId,movieId,tag,timestamp
0,10,260,good vs evil,1430666558
1,10,260,Harrison Ford,1430666505
2,10,260,sci-fi,1430666538
3,14,1221,Al Pacino,1311600756
4,14,1221,mafia,1311600746


### Extracting user given tags for each movie

In [5]:
moviesTags = []
for id in movies_df["movieId"]:
    tags = set(tags_df[tags_df["movieId"] == id]["tag"].astype(str))
    moviesTags.append('|'.join(list(tags)))

### Adding tags column for the movies dataframe

In [6]:
movies_df["tags"] = moviesTags
movies_df.head(10)

Unnamed: 0,movieId,title,genres,tags
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,kids and family|chrysler lebaron convertible|m...
1,2,Jumanji (1995),Adventure|Children|Fantasy,cutting one's own hair|reference to wilt chamb...
2,3,Grumpier Old Men (1995),Comedy|Romance,sequel|good soundtrack|Ann Margaret|old|moldy|...
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,interracial relationship|revenge|slurs|girl mo...
4,5,Father of the Bride Part II (1995),Comedy,sequel|daughter|pregnancy|growing old|childhoo...
5,6,Heat (1995),Action|Crime|Thriller,In The Nucleus|Al Pacino Vs Robert De Niro|tra...
6,7,Sabrina (1995),Comedy|Romance,fashion assistant|sexuality|female protagonist...
7,8,Tom and Huck (1995),Adventure|Children,flintlock rifle|alabama|evil man|kiss|disarmin...
8,9,Sudden Death (1995),Action,mercilessness|evil man|disarming someone|stabb...
9,10,GoldenEye (1995),Action|Adventure|Thriller,m character|villainess|automobile|shaken not s...


In [7]:
# Saving modified dataframe
# movies_df.to_csv("./data/movies_modified.csv")

### Normalizing users rating

In [18]:
temp_df = ratings_df.copy()

# Calculate the minimum and maximum ratings for each user
user_min_ratings = temp_df.groupby("userId")["rating"].transform("min")
user_max_ratings = temp_df.groupby("userId")["rating"].transform("max")

# Normalize the ratings for each user
temp_df["rating"] = 4 / (user_max_ratings - user_min_ratings) * (temp_df["rating"] - user_max_ratings) + 5

temp_df.head(20)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,3.666667,1225734739
1,1,110,3.666667,1225865086
2,1,158,3.666667,1225733503
3,1,260,4.333333,1225735204
4,1,356,5.0,1225735119
5,1,381,3.0,1225734105
6,1,596,3.666667,1225733524
7,1,1036,5.0,1225735626
8,1,1049,2.333333,1225734079
9,1,1066,3.666667,1225736961


In [19]:
# Saving modified dataframe
# temp_df.to_csv("./data/rates_modified.csv")

### Importing Deep Learning Modules

In [27]:
!pip install tensorflow
!pip install keras
import tensorflow
from keras.layers import Embedding, Reshape, Merge
from keras.models import Sequential
from keras.optimizers import Adamax
from keras.callbacks import EarlyStopping, ModelCheckpoint

Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/80/6f/57d36f6507e432d7fc1956b2e9e8530c5c2d2bfcd8821bcbfae271cd6688/tensorflow-2.14.0-cp311-cp311-win_amd64.whl.metadata
  Using cached tensorflow-2.14.0-cp311-cp311-win_amd64.whl.metadata (3.3 kB)
Collecting tensorflow-intel==2.14.0 (from tensorflow)
  Obtaining dependency information for tensorflow-intel==2.14.0 from https://files.pythonhosted.org/packages/ad/6e/1bfe367855dd87467564f7bf9fa14f3b17889988e79598bc37bf18f5ffb6/tensorflow_intel-2.14.0-cp311-cp311-win_amd64.whl.metadata
  Using cached tensorflow_intel-2.14.0-cp311-cp311-win_amd64.whl.metadata (4.8 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.14.0->tensorflow)
  Obtaining dependency information for absl-py>=1.0.0 from https://files.pythonhosted.org/packages/01/e4/dc0a1dcc4e74e08d7abedab278c795eef54a224363bb18f5692f416d834f/absl_py-2.0.0-py3-none-any.whl.metadata
  Using cached absl_py-2.0.0-py3-none-

ERROR: Could not install packages due to an OSError: [Errno 28] No space left on device




   ------------ --------------------------- 92.4/284.2 MB 3.5 MB/s eta 0:00:56
   ------------- -------------------------- 92.5/284.2 MB 3.5 MB/s eta 0:00:56
   ------------- -------------------------- 92.6/284.2 MB 3.4 MB/s eta 0:00:56
   ------------- -------------------------- 92.7/284.2 MB 3.4 MB/s eta 0:00:56
   ------------- -------------------------- 92.8/284.2 MB 3.4 MB/s eta 0:00:56
   ------------- -------------------------- 93.0/284.2 MB 3.4 MB/s eta 0:00:56
   ------------- -------------------------- 93.1/284.2 MB 3.5 MB/s eta 0:00:56
   ------------- -------------------------- 93.2/284.2 MB 3.5 MB/s eta 0:00:56
   ------------- -------------------------- 93.4/284.2 MB 3.5 MB/s eta 0:00:56
   ------------- -------------------------- 93.5/284.2 MB 3.5 MB/s eta 0:00:56
   ------------- -------------------------- 93.6/284.2 MB 3.5 MB/s eta 0:00:56
   ------------- -------------------------- 93.8/284.2 MB 3.5 MB/s eta 0:00:55
   ------------- -------------------------- 93.9/28

ModuleNotFoundError: No module named 'tensorflow'

### Building our neural network model

In [None]:
factors = 100