In [3]:
pip install pandas scikit-learn numpy

Defaulting to user installation because normal site-packages is not writeable
Looking in links: /usr/share/pip-wheels
Note: you may need to restart the kernel to use updated packages.


In [86]:
import pandas as pd

# Load the dataset
df = pd.read_csv("movies.csv")

# first few rows
print(df.head())


                                         Poster_Link  \
0  https://m.media-amazon.com/images/M/MV5BMDFkYT...   
1  https://m.media-amazon.com/images/M/MV5BM2MyNj...   
2  https://m.media-amazon.com/images/M/MV5BMTMxNT...   
3  https://m.media-amazon.com/images/M/MV5BMWMwMG...   
4  https://m.media-amazon.com/images/M/MV5BMWU4N2...   

               Series_Title Released_Year Certificate  Runtime  \
0  The Shawshank Redemption          1994           A  142 min   
1             The Godfather          1972           A  175 min   
2           The Dark Knight          2008          UA  152 min   
3    The Godfather: Part II          1974           A  202 min   
4              12 Angry Men          1957           U   96 min   

                  Genre  IMDB_Rating  \
0                 Drama          9.3   
1          Crime, Drama          9.2   
2  Action, Crime, Drama          9.0   
3          Crime, Drama          9.0   
4          Crime, Drama          9.0   

                         

In [88]:
df = df[['Series_Title', 'Overview', 'Genre', 'IMDB_Rating']]  # Keep only relevant columns
df = df.rename(columns={'Series_Title': 'Title', 'Overview': 'Description'})  # Rename for consistency

# Drop missing values
df = df.dropna()

print(df.head())  


                      Title  \
0  The Shawshank Redemption   
1             The Godfather   
2           The Dark Knight   
3    The Godfather: Part II   
4              12 Angry Men   

                                         Description                 Genre  \
0  Two imprisoned men bond over a number of years...                 Drama   
1  An organized crime dynasty's aging patriarch t...          Crime, Drama   
2  When the menace known as the Joker wreaks havo...  Action, Crime, Drama   
3  The early life and career of Vito Corleone in ...          Crime, Drama   
4  A jury holdout attempts to prevent a miscarria...          Crime, Drama   

   IMDB_Rating  
0          9.3  
1          9.2  
2          9.0  
3          9.0  
4          9.0  


In [90]:
import re

def preprocess_text(text):
    text = str(text).lower()  # Convert to lowercase
    text = re.sub(r'\W', ' ', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    return text.strip()

# Applying text preprocessing
df['processed_description'] = df['Description'].apply(preprocess_text)

# Show processed descriptions
print(df[['Title', 'processed_description']].head())


                      Title                              processed_description
0  The Shawshank Redemption  two imprisoned men bond over a number of years...
1             The Godfather  an organized crime dynasty s aging patriarch t...
2           The Dark Knight  when the menace known as the joker wreaks havo...
3    The Godfather: Part II  the early life and career of vito corleone in ...
4              12 Angry Men  a jury holdout attempts to prevent a miscarria...


In [92]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['Description'])


In [94]:
from sklearn.metrics.pairwise import cosine_similarity

def get_recommendations(user_input, top_n=5):
    user_vector = vectorizer.transform([user_input])  # Convert input into TF-IDF vector
    similarities = cosine_similarity(user_vector, tfidf_matrix).flatten()  # Compute similarity
    top_indices = similarities.argsort()[-top_n:][::-1]  # Get top N similar movies
    return df.iloc[top_indices][['Title', 'Genre', 'IMDB_Rating', 'Description']]  # Return top results

# Test the recommendation system
user_query = "I love thrilling action movies set in space."
print(get_recommendations(user_query))


                         Title                         Genre  IMDB_Rating  \
378            The Incredibles  Animation, Action, Adventure          8.0   
692  The Man Who Would Be King       Adventure, History, War          7.8   
826                Barton Fink       Comedy, Drama, Thriller          7.7   
106                     Aliens     Action, Adventure, Sci-Fi          8.3   
24         Saving Private Ryan                    Drama, War          8.6   

                                           Description  
378  A family of undercover superheroes, while tryi...  
692  Two British former soldiers decide to set them...  
826  A renowned New York playwright is enticed to C...  
106  Fifty-seven years after surviving an apocalypt...  
24   Following the Normandy Landings, a group of U....  


In [96]:
from sklearn.metrics.pairwise import cosine_similarity

def get_recommendations(user_input, df, vectorizer, tfidf_matrix, top_n=5):
    user_input = preprocess_text(user_input)  # Preprocess input text
    user_vector = vectorizer.transform([user_input])  # Convert input to TF-IDF vector

    # Compute cosine similarity between the user input and all dataset items
    similarities = cosine_similarity(user_vector, tfidf_matrix).flatten()

    # Get top N most similar items
    top_indices = similarities.argsort()[-top_n:][::-1]  # Sort in descending order

    # Select recommended movies and return them
    recommendations = df.iloc[top_indices][['Title', 'Description', 'Genre', 'IMDB_Rating']]
    
    return recommendations


In [100]:
# Example query from a user
user_query = "I love thrilling action movies set in space."

# Get top recommendations
recommendations = get_recommendations(user_query, df, vectorizer, tfidf_matrix)

# Display results
print("Top Recommended Movies:\n", recommendations)


Top Recommended Movies:
                          Title  \
378            The Incredibles   
692  The Man Who Would Be King   
826                Barton Fink   
106                     Aliens   
24         Saving Private Ryan   

                                           Description  \
378  A family of undercover superheroes, while tryi...   
692  Two British former soldiers decide to set them...   
826  A renowned New York playwright is enticed to C...   
106  Fifty-seven years after surviving an apocalypt...   
24   Following the Normandy Landings, a group of U....   

                            Genre  IMDB_Rating  
378  Animation, Action, Adventure          8.0  
692       Adventure, History, War          7.8  
826       Comedy, Drama, Thriller          7.7  
106     Action, Adventure, Sci-Fi          8.3  
24                     Drama, War          8.6  


In [102]:
!pip install ipywidgets


Defaulting to user installation because normal site-packages is not writeable
Looking in links: /usr/share/pip-wheels


In [103]:
import ipywidgets as widgets
from IPython.display import display

# Create a text input box
text_input = widgets.Text(
    placeholder="Describe a movie you like...",
    layout=widgets.Layout(width="50%")
)

# Create a button
button = widgets.Button(description="Get Recommendations")

# Function to display recommendations
def on_button_click(b):
    user_query = text_input.value  # Get user input
    recommendations = get_recommendations(user_query, df, vectorizer, tfidf_matrix)
    print("\n Top Recommended Movies:\n")
    display(recommendations)

# Attach function to button click
button.on_click(on_button_click)

# Display the UI
display(text_input, button)


Text(value='', layout=Layout(width='50%'), placeholder='Describe a movie you like...')

Button(description='Get Recommendations', style=ButtonStyle())