## Content-Based Movie Recommendation System

### Importing libraries

In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

difflib is imported to get the name that is most similar to the movie name given by the user

### Data Collection and Pre-processing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
file_path = "/content/drive/MyDrive/Colab Notebooks/Projects/Movie Recommendation Project/movies.csv"

In [None]:
# Loading the data
movies_data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Projects/Movie Recommendation Project/movies.csv")

In [None]:
movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [None]:
movies_data.shape

(4803, 24)

Feature selection

In [None]:
Selected_features = ['genres', 'keywords', 'overview', 'tagline', 'cast', 'director']

In [None]:
Selected_features

['genres', 'keywords', 'overview', 'tagline', 'cast', 'director']

In [None]:
# Replacing the null values with null string
for feature in Selected_features:
  movies_data[feature] = movies_data[feature].fillna(" ")

In [None]:
# Combining all the relevant feature
combined_features = movies_data['genres'] +' '+ movies_data['keywords'] +' '+ movies_data['overview'] +' '+ movies_data['tagline'] +' '+ movies_data['cast'] +' '+ movies_data['director']

In [None]:
combined_features[0]

'Action Adventure Fantasy Science Fiction culture clash future space war space colony society In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. Enter the World of Pandora. Sam Worthington Zoe Saldana Sigourney Weaver Stephen Lang Michelle Rodriguez James Cameron'

In [None]:
# Converting the text data into numerical vectors
vectorizer = TfidfVectorizer()

In [None]:
numerical_vectors = vectorizer.fit_transform(combined_features)

In [None]:
print(numerical_vectors)

  (0, 561)	0.05971816344971169
  (0, 703)	0.06846420517510078
  (0, 9754)	0.08513696797398294
  (0, 23977)	0.07941905576010944
  (0, 10023)	0.07960231361105431
  (0, 6601)	0.1498786462809525
  (0, 5279)	0.1549075340655008
  (0, 10796)	0.11095111375730655
  (0, 25413)	0.24369151759694266
  (0, 29469)	0.08637114089261566
  (0, 5592)	0.17322386697661618
  (0, 25232)	0.1336739112380023
  (0, 13474)	0.03647840985795873
  (0, 27118)	0.08328687324810813
  (0, 239)	0.19716936546022962
  (0, 4768)	0.12501550204808315
  (0, 20104)	0.19260031442011954
  (0, 17021)	0.14324534514274753
  (0, 14023)	0.041700928584839175
  (0, 7827)	0.1797988202434295
  (0, 27405)	0.0310150192195571
  (0, 18249)	0.14769810641761003
  (0, 20039)	0.37747447361884223
  (0, 19541)	0.04908322232312576
  (0, 28597)	0.1549075340655008
  :	:
  (4802, 9588)	0.10415492266309316
  (4802, 21386)	0.08975511036766048
  (4802, 9166)	0.07829183421073847
  (4802, 24102)	0.08207980733784753
  (4802, 6136)	0.10774075928383217
  (4802, 

### Cosine Similarity

In [None]:
# Calculating the similarity scores using cosine similarity
similarity_matrix = cosine_similarity(numerical_vectors)

In [None]:
similarity_matrix

array([[1.        , 0.05083168, 0.0332947 , ..., 0.02749812, 0.0304889 ,
        0.0072518 ],
       [0.05083168, 1.        , 0.04356836, ..., 0.05077045, 0.03100979,
        0.01521198],
       [0.0332947 , 0.04356836, 1.        , ..., 0.02646984, 0.04751623,
        0.01372603],
       ...,
       [0.02749812, 0.05077045, 0.02646984, ..., 1.        , 0.03481447,
        0.03546821],
       [0.0304889 , 0.03100979, 0.04751623, ..., 0.03481447, 1.        ,
        0.03098945],
       [0.0072518 , 0.01521198, 0.01372603, ..., 0.03546821, 0.03098945,
        1.        ]])

In [None]:
similarity_matrix.shape

(4803, 4803)

In [None]:
# Getting the movie name from user
movie_name = input("Enter your favourite movie name: ")
movie_name

Enter your favourite movie name: batman


'batman'

In [None]:
# Creating a list with all the movie names given in the dataset
movie_titles = movies_data['title'].tolist()
movie_titles[0:10]

['Avatar',
 "Pirates of the Caribbean: At World's End",
 'Spectre',
 'The Dark Knight Rises',
 'John Carter',
 'Spider-Man 3',
 'Tangled',
 'Avengers: Age of Ultron',
 'Harry Potter and the Half-Blood Prince',
 'Batman v Superman: Dawn of Justice']

In [None]:
# finding the closest match for the movie name given by the user
close_matches = difflib.get_close_matches(movie_name, movie_titles)
close_matches

['Batman', 'Batman', 'Catwoman']

In [None]:
closest_match = close_matches[0]
closest_match

'Batman'

In [None]:
# finding the index of the movie
movie_index = movies_data[movies_data.title==closest_match]['index'].values[0]
movie_index

1359

In [None]:
# getting a list of similar movies
similarity_score = list(enumerate(similarity_matrix[movie_index]))
similarity_score

[(0, 0.030860719934350737),
 (1, 0.05456461098791574),
 (2, 0.025420994912788583),
 (3, 0.2338052098853376),
 (4, 0.0358579417889144),
 (5, 0.06480779316059049),
 (6, 0.025328125342328544),
 (7, 0.031193482768275965),
 (8, 0.07596121965157163),
 (9, 0.13026811298484625),
 (10, 0.07833585908102393),
 (11, 0.03140815118288081),
 (12, 0.046352500640976665),
 (13, 0.020981937983779262),
 (14, 0.08730563650625815),
 (15, 0.045390479826427535),
 (16, 0.04552566681084561),
 (17, 0.05419186846387077),
 (18, 0.05218562648148115),
 (19, 0.04606592690189089),
 (20, 0.0525972351819297),
 (21, 0.051347408495229015),
 (22, 0.02839994503747764),
 (23, 0.019972687270657253),
 (24, 0.03409065291411044),
 (25, 0.03537669989730546),
 (26, 0.03301451878795811),
 (27, 0.03862483049353923),
 (28, 0.009394777698689412),
 (29, 0.027117087927202004),
 (30, 0.11255005372745663),
 (31, 0.02385466070349696),
 (32, 0.05433211054681876),
 (33, 0.013086526824339497),
 (34, 0.008338950414900109),
 (35, 0.043018081030

In [None]:
len(similarity_score)

4803

In [None]:
# Sorting the movies based on the similarity score values
sorted_similar_movies = sorted(similarity_score, key=lambda x:x[1], reverse=True)
sorted_similar_movies

[(1359, 1.0),
 (428, 0.2617904941926373),
 (119, 0.24169629159349437),
 (210, 0.23658744402159052),
 (3, 0.2338052098853376),
 (65, 0.19739154749412363),
 (3854, 0.17189148066390786),
 (299, 0.17131588130636344),
 (9, 0.13026811298484625),
 (30, 0.11255005372745663),
 (72, 0.11188219858913873),
 (1469, 0.10632393464294962),
 (41, 0.09937847357289423),
 (813, 0.09831507541115998),
 (1512, 0.09722485277470566),
 (2381, 0.09504139941592654),
 (1390, 0.09212763468361622),
 (4759, 0.09119404017361916),
 (1001, 0.09045240143423047),
 (870, 0.0901958374513215),
 (1017, 0.08998086162656344),
 (2858, 0.08816994503073253),
 (14, 0.08730563650625815),
 (1247, 0.08650870548084974),
 (753, 0.08580678946933294),
 (1421, 0.08488904988897164),
 (2365, 0.08348512466950818),
 (3819, 0.08346778604898233),
 (3297, 0.08312337849316276),
 (4183, 0.08234811260685254),
 (3068, 0.08148501232489744),
 (1803, 0.08148108286462766),
 (2029, 0.08124850902763674),
 (4392, 0.08059207903276856),
 (800, 0.0804322822924

In [None]:
# printing the name of similar movies based on the index
print("Movies suggested for you:- ")

i = 0
for movie in sorted_similar_movies:
  index = movie[0]
  movie_title = movies_data[movies_data['index']==index]['title'].values[0]
  if (i!=0) and (i<11):
    print(i, ".", movie_title)
  i+=1



Movies suggested for you:- 
1 . Batman Returns
2 . Batman Begins
3 . Batman & Robin
4 . The Dark Knight Rises
5 . The Dark Knight
6 . Batman: The Dark Knight Returns, Part 2
7 . Batman Forever
8 . Batman v Superman: Dawn of Justice
9 . Spider-Man 2
10 . Suicide Squad


#### Movies Recommendation System

In [None]:
movie_name = input("Enter your favourite movie name: ")
movie_titles = movies_data['title'].tolist()
close_matches = difflib.get_close_matches(movie_name, movie_titles)
closest_match = close_matches[0]
movie_index = movies_data[movies_data.title==closest_match]['index'].values[0]
similarity_score = list(enumerate(similarity_matrix[movie_index]))
sorted_similar_movies = sorted(similarity_score, key=lambda x:x[1], reverse=True)
print("Movies suggested for you:- ")

i = 0
for movie in sorted_similar_movies:
  index = movie[0]
  movie_title = movies_data[movies_data['index']==index]['title'].values[0]
  if (i!=0) and (i<11):
    print(i, ".", movie_title)
  i+=1

Enter your favourite movie name: batman
Movies suggested for you:- 
1 . Batman Returns
2 . Batman Begins
3 . Batman & Robin
4 . The Dark Knight Rises
5 . The Dark Knight
6 . Batman: The Dark Knight Returns, Part 2
7 . Batman Forever
8 . Batman v Superman: Dawn of Justice
9 . Spider-Man 2
10 . Suicide Squad


In [None]:
!pip install streamlit pyngrok

Collecting streamlit
  Downloading streamlit-1.40.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Downloading streamlit-1.40.0-py2.py3-none-any.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m53.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-5.0.3-py3-none-manylinux2014_x86_64

In [None]:
import os

# Create the .streamlit directory
os.makedirs(os.path.expanduser("~/.streamlit"), exist_ok=True)

# Write theme configuration to config.toml
with open(os.path.expanduser("~/.streamlit/config.toml"), "w") as f:
    f.write("""
    [theme]
    primaryColor = "#e50914"  # Netflix-style red accent
    backgroundColor = "#121212"  # Dark charcoal background
    secondaryBackgroundColor = "#1e1e1e"  # Slightly lighter for sidebars or sections
    textColor = "#ffffff"  # White for readability
    font = "sans serif"  # Clean, modern font
    """)


In [None]:
import os

# Create the .streamlit directory and config.toml file
os.makedirs(os.path.expanduser("~/.streamlit"), exist_ok=True)

with open(os.path.expanduser("~/.streamlit/config.toml"), "w") as f:
    f.write("""
    [theme]
    primaryColor = '#e50914'  # Netflix-style red accent
    backgroundColor = '#121212'  # Dark charcoal background
    secondaryBackgroundColor = '#1e1e1e'  # Slightly lighter for sidebars or sections
    textColor = '#ffffff'  # White for readability
    font = 'sans serif'  # Clean, modern font
    """)


In [None]:
%%writefile app.py
import streamlit as st
import numpy as np
import pandas as pd
import difflib
import time
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

st.markdown("""
    <style>
    /* Background colors */
    .css-18e3th9 {background-color: #121212;} /* Main background */
    .css-1d391kg {background-color: #1e1e1e;} /* Sidebar background */

    /* Text color */
    .css-10trblm {color: #ffffff;} /* General text color */

    /* Primary button color */
    .css-1cpxqw2 {background-color: #e50914; color: #ffffff;} /* Button background and text color */

    /* Font styling */
    .css-12oz5g7 {font-family: sans-serif;} /* General font */

    /* Title styling */
    h1, h2, h3, h4, h5, h6 {color: #e50914;} /* Header colors to match the primary color */

    /* Customize other elements as needed */
    </style>
    """, unsafe_allow_html=True)

# Loading movie dataset
@st.cache_data
def load_data():
    df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Projects/Movie Recommendation Project/movies.csv")
    return df

# Load data
movies_data = load_data()

Selected_features = ['genres', 'keywords', 'overview', 'tagline', 'cast', 'director']

for feature in Selected_features:
  movies_data[feature] = movies_data[feature].fillna(" ")

# Combining all the relevant feature
combined_features = movies_data['genres'] +' '+ movies_data['keywords'] +' '+ movies_data['overview'] +' '+ movies_data['tagline'] +' '+ movies_data['cast'] +' '+ movies_data['director']

# setting up the vectorizer
vectorizer = TfidfVectorizer()

# converting the combined feature into numerical vectors
numerical_vectors = vectorizer.fit_transform(combined_features)

# creating the similarity matrix using cosine similarity
similarity_matrix = cosine_similarity(numerical_vectors)


movie_titles = movies_data['title'].tolist()

# Function to get similar movies based on movie title
def recommend_movies_by_title(input_title, top_n=11):
    # Getting the most matching movie title
    close_matches = difflib.get_close_matches(input_title, movie_titles)
    closest_match = close_matches[0]

    movie_index = movies_data[movies_data.title==closest_match]['index'].values[0]

    similarity_score = list(enumerate(similarity_matrix[movie_index]))
    sorted_similar_movies = sorted(similarity_score, key=lambda x:x[1], reverse=True)

    # Get indices of top 10 similar movies
    recommended_movies = pd.DataFrame(columns=['Title', 'Genre', 'Description'])

    i = 0
    for movie in sorted_similar_movies:
      index = movie[0]
      movie_title = movies_data[movies_data['index']==index]['title'].values[0]
      movie_genre = movies_data[movies_data['index']==index]['genres'].values[0]
      movie_desc = movies_data[movies_data['index']==index]['overview'].values[0]

      if i < 10:
        recommended_movies.loc[i] = [movie_title, movie_genre, movie_desc]
      i+=1

    return recommended_movies



# Streamlit App
st.title("🎬 Movie Recommendation System")
st.write("Enter the name of a movie you like, and we'll recommend similar movies!")

# User input for movie title
st.sidebar.header("Find Similar Movies")
user_movie = st.sidebar.text_input("Enter a Movie Title", "The Matrix")

# Button to get recommendations
if st.sidebar.button("Get Recommendations"):
    # Getting recommendations

    with st.spinner('Operation in progress. Please wait...'):
        recommended_movies = recommend_movies_by_title(user_movie)
    st.subheader("Recommended Movies")
    st.write("---")
# st.success("Done!")



  # progress_text = "Operation in progress. Please wait."

    # Display recommended movies if any
    if recommended_movies is not None:
        for index, row in recommended_movies.iterrows():
          # if index==0:
          #   continue
          title_html = f"<h3 style='font-size: 24px; margin-bottom: 5px;'>{row['Title']}</h3>"
          st.markdown(title_html, unsafe_allow_html=True)
          st.write(f"_Genres:_ {row['Genre']}")
          st.write(row['Description'])
          st.markdown("<hr style='border:1px solid #cccccc;'>", unsafe_allow_html=True)

Overwriting app.py


In [None]:
!pip install pyngrok
!ngrok config add-authtoken 2kSH6BFKso3rv4m41lr5NlyJHJr_6hCuZWtwTUarcVSoVXdt3

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
!ngrok config add-authtoken

add-authtoken - save authtoken to configuration file

USAGE:
  ngrok config add-authtoken TOKEN [flags]

AUTHOR:
  ngrok - <support@ngrok.com>

COMMANDS: 
  config          update or migrate ngrok's configuration file
  http            start an HTTP tunnel
  tcp             start a TCP tunnel
  tunnel          start a tunnel for use with a tunnel-group backend

EXAMPLES: 
  ngrok http 80                                                 # secure public URL for port 80 web server
  ngrok http --url baz.ngrok.dev 8080                           # port 8080 available at baz.ngrok.dev
  ngrok tcp 22                                                  # tunnel arbitrary TCP traffic to port 22
  ngrok http 80 --oauth=google --oauth-allow-email=foo@foo.com  # secure your app with oauth

Paid Features: 
  ngrok http 80 --url mydomain.com                              # run ngrok with your own custom domain
  ngrok http 80 --cidr-allow 2600:8c00::a03c:91ee:fe69:9695/32  # run ngrok with IP policy rest

In [None]:
from pyngrok import ngrok

# Start a ngrok tunnel to the Streamlit app port 8501
public_url = ngrok.connect(8501, "http")
print("Streamlit app URL:", public_url)


Streamlit app URL: NgrokTunnel: "https://ae26-34-27-168-81.ngrok-free.app" -> "http://localhost:8501"


In [None]:
!streamlit run app.py &>/dev/null&