In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
movies_data = {
    "movie_name": [
        "Vikram", "Master", "Beast", "Valimai", "Thunivu",
        "Jailer", "Leo", "Varisu", "Ponniyin Selvan", "Vikram Vedha",
        "96", "Premam", "Mersal", "Bigil", "Sarkar",
        "Kaithi", "Soorarai Pottru", "Jai Bhim", "Ratchasan", "Irudhi Suttru",
        "Pariyerum Perumal", "Karnan", "Asuran", "Super Deluxe", "Mandela",
        "Doctor", "Maanagaram", "Thani Oruvan", "Anjaan", "Enthiran"
    ],
    "genre": [
        "Action", "Action", "Action", "Action", "Action",
        "Action", "Action", "Drama", "Drama", "Thriller",
        "Romance", "Romance", "Action", "Action", "Action",
        "Thriller", "Drama", "Drama", "Thriller", "Drama",
        "Drama", "Drama", "Drama", "Drama", "Comedy",
        "Comedy", "Thriller", "Thriller", "Action", "Fantasy"
    ],
    "director": [
        "Lokesh Kanagaraj", "Lokesh Kanagaraj", "Nelson", "H Vinoth", "H Vinoth",
        "Nelson", "Lokesh Kanagaraj", "Vamshi Paidipally", "Mani Ratnam", "Pushkar Gayathri",
        "C Prem Kumar", "Alphonse Puthren", "Atlee", "Atlee", "AR Murugadoss",
        "Lokesh Kanagaraj", "Sudha Kongara", "TJ Gnanavel", "Ram Kumar", "Sudha Kongara",
        "Mari Selvaraj", "Mari Selvaraj", "Vetrimaaran", "Thiagarajan Kumararaja", "Madonne Ashwin",
        "Nelson", "Lokesh Kanagaraj", "Mohan Raja", "Lingusamy", "Shankar"
    ],
    "rating": [
        9.0, 8.5, 7.0, 7.5, 8.0,
        8.5, 8.5, 7.5, 8.8, 8.7,
        9.2, 9.0, 8.5, 8.0, 7.8,
        8.8, 9.0, 9.3, 8.5, 8.8,
        9.0, 9.1, 9.0, 9.2, 8.5,
        8.0, 8.7, 8.8, 7.5, 8.5
    ],
    "description": [
        "undercover cop action packed thriller with multiple villains and twists",
        "college action drama about professor fighting drug lord inside campus",
        "raw action movie about beast rescuing hostages from terrorists in mall",
        "police officer chases dangerous biker gang across highways nonstop action",
        "heist thriller about robbing black money from corrupt bank officials",
        "retired jailer fights powerful gangster to protect his family action",
        "action thriller about a mysterious man with a dangerous violent past",
        "family drama about corporate heir returning home to manage family business",
        "epic historical drama about chola kingdom power politics and war",
        "cat and mouse thriller between cop and dangerous criminal reforming",
        "beautiful love story between school friends reuniting after many years",
        "sweet innocent college romance about falling in love naturally",
        "action drama about doctor fighting corrupt politicians for justice",
        "football coach leads slum team to victory while fighting corruption",
        "action drama about common man becoming chief minister fighting corruption",
        "single night action thriller about ex cop delivering prisoner safely",
        "inspiring drama about common man building low cost airline against odds",
        "powerful legal drama about tribal student fighting for justice against police",
        "dark thriller about serial killer hunting young women across city",
        "sports drama about female boxer trained by strict coach winning championship",
        "emotional drama about lower caste student facing discrimination in college",
        "powerful drama about village youth fighting caste oppression and injustice",
        "raw emotional drama about farmer protecting family from upper caste violence",
        "anthology drama with interconnected stories about identity morality and life",
        "comedy drama about small village election and an independent candidate",
        "comedy action about doctor outsmarting dangerous organ harvesting gang",
        "urban thriller about four strangers connected in one night in Chennai",
        "investigative thriller about scientist uncovering dangerous political conspiracy",
        "stylish action about gangster protecting girl from powerful enemies",
        "sci fi fantasy about robot falling in love and turning dangerous"
    ]
}

df = pd.DataFrame(movies_data)
print("Dataset created with", len(df), "movies!")
print(df.head())

Dataset created with 30 movies!
  movie_name   genre          director  rating  \
0     Vikram  Action  Lokesh Kanagaraj     9.0   
1     Master  Action  Lokesh Kanagaraj     8.5   
2      Beast  Action            Nelson     7.0   
3    Valimai  Action          H Vinoth     7.5   
4    Thunivu  Action          H Vinoth     8.0   

                                         description  
0  undercover cop action packed thriller with mul...  
1  college action drama about professor fighting ...  
2  raw action movie about beast rescuing hostages...  
3  police officer chases dangerous biker gang acr...  
4  heist thriller about robbing black money from ...  


In [3]:
df["combined_features"] = df["genre"] + " " + df["description"]
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df["combined_features"])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
print("Recommendation engine ready!")
print("Total movies:", len(df))

Recommendation engine ready!
Total movies: 30


In [4]:
def recommend_movies(movie_name, num_recommendations=5):
    if movie_name not in df["movie_name"].values:
        print(f"Sorry! '{movie_name}' not found in our database.")
        print("Available movies:", list(df["movie_name"].values))
        return
    idx = df[df["movie_name"] == movie_name].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    movie_indices = [i[0] for i in sim_scores[1:15]]
    recommended = df.iloc[movie_indices].copy()
    recommended["similarity"] = [i[1] for i in sim_scores[1:15]]
    recommended["final_score"] = (recommended["similarity"] * 0.7) + (recommended["rating"] / 10 * 0.3)
    
    top_movies = recommended.sort_values("final_score", ascending=False).head(num_recommendations)
    
    print(f"\nðŸŽ¬ Because you liked '{movie_name}', we recommend:\n")
    print("-" * 60)
    for i, (_, row) in enumerate(top_movies.iterrows(), 1):
        print(f"{i}. {row['movie_name']}")
        print(f"   Genre: {row['genre']} | Rating: {row['rating']}/10 | Director: {row['director']}")
        print()

In [5]:
recommend_movies("Vikram")


ðŸŽ¬ Because you liked 'Vikram', we recommend:

------------------------------------------------------------
1. Kaithi
   Genre: Thriller | Rating: 8.8/10 | Director: Lokesh Kanagaraj

2. Vikram Vedha
   Genre: Thriller | Rating: 8.7/10 | Director: Pushkar Gayathri

3. Leo
   Genre: Action | Rating: 8.5/10 | Director: Lokesh Kanagaraj

4. Mersal
   Genre: Action | Rating: 8.5/10 | Director: Atlee

5. Jailer
   Genre: Action | Rating: 8.5/10 | Director: Nelson



In [6]:
recommend_movies("96")


ðŸŽ¬ Because you liked '96', we recommend:

------------------------------------------------------------
1. Premam
   Genre: Romance | Rating: 9.0/10 | Director: Alphonse Puthren

2. Enthiran
   Genre: Fantasy | Rating: 8.5/10 | Director: Shankar

3. Vikram
   Genre: Action | Rating: 9.0/10 | Director: Lokesh Kanagaraj

4. Ponniyin Selvan
   Genre: Drama | Rating: 8.8/10 | Director: Mani Ratnam

5. Vikram Vedha
   Genre: Thriller | Rating: 8.7/10 | Director: Pushkar Gayathri



In [7]:
print("Top 10 Highest Rated Tamil Movies in our Database:\n")
print("-" * 50)
top_rated = df.sort_values("rating", ascending=False).head(10)
for i, (_, row) in enumerate(top_rated.iterrows(), 1):
    print(f"{i}. {row['movie_name']} ({row['genre']}) - {row['rating']}/10 - Dir: {row['director']}")

Top 10 Highest Rated Tamil Movies in our Database:

--------------------------------------------------
1. Jai Bhim (Drama) - 9.3/10 - Dir: TJ Gnanavel
2. 96 (Romance) - 9.2/10 - Dir: C Prem Kumar
3. Super Deluxe (Drama) - 9.2/10 - Dir: Thiagarajan Kumararaja
4. Karnan (Drama) - 9.1/10 - Dir: Mari Selvaraj
5. Premam (Romance) - 9.0/10 - Dir: Alphonse Puthren
6. Vikram (Action) - 9.0/10 - Dir: Lokesh Kanagaraj
7. Pariyerum Perumal (Drama) - 9.0/10 - Dir: Mari Selvaraj
8. Asuran (Drama) - 9.0/10 - Dir: Vetrimaaran
9. Soorarai Pottru (Drama) - 9.0/10 - Dir: Sudha Kongara
10. Irudhi Suttru (Drama) - 8.8/10 - Dir: Sudha Kongara
