In [None]:
! pip install pandas
! pip install scikit-learn

In [None]:
# Section for imports

import pandas as pd
import sys
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
def load_data(filepath):
    """
    Load the movie dataset from a CSV file.
    """
    try:
        data = pd.read_csv(filepath)
    except Exception as e:
        sys.exit(f"Error loading {filepath}: {e}")
    return data

In [None]:
def build_tfidf(corpus):
    """
    Build the vectorizer from the Corpus
    """
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(corpus)
    return vectorizer, tfidf_matrix

In [None]:
def get_recommendations(user_input, vectorizer, tfidf_matrix, data, top_n=5):
    """
    Given a user's input, compute cosine similarities and return the top_n most similar movies.
    """
    user_tfidf = vectorizer.transform([user_input])
    cosine_sim = cosine_similarity(user_tfidf, tfidf_matrix).flatten()
    top_indices = cosine_sim.argsort()[-top_n:][::-1]
    recommendations = data.iloc[top_indices].copy()
    recommendations['similarity'] = cosine_sim[top_indices]
    return recommendations 

In [None]:

def run(query, csv_path='movies.csv'):
    

    data = load_data(csv_path)
    data['genres'] = data['genres'].str.replace('|', ' ')
    
    vectorizer, tfidf_matrix = build_tfidf(data['genres'])
    recommendations = get_recommendations(query, vectorizer, tfidf_matrix, data)
    
    
    print("\nTop recommendations based on query:")
    print(recommendations.columns)
    print(recommendations[['title', 'genres', 'similarity']].to_string(index=False))

In [None]:
query = "I love action movies"
run(query)