In [14]:
import pandas as pd
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import requests
import os

# Load datasets
movies_df = pd.read_csv('../data/movies.csv')
credits_df = pd.read_csv('../data/credits.csv')

# Parse genres (JSON string to list of names)
movies_df['genres'] = movies_df['genres'].apply(lambda x: ' '.join([d['name'] for d in ast.literal_eval(x)]) if pd.notna(x) else '')

# Parse cast and crew (JSON string to space-separated names)
credits_df['cast'] = credits_df['cast'].apply(lambda x: ' '.join([d['name'] for d in ast.literal_eval(x)[:3]]) if pd.notna(x) else '')
credits_df['crew'] = credits_df['crew'].apply(lambda x: ' '.join([d['name'] for d in ast.literal_eval(x) if d['job'] == 'Director']) if pd.notna(x) else '')

# Merge datasets on title
merged_df = pd.merge(movies_df, credits_df, on='title', how='left')

# Filter for Indian movies (based on language or country)
indian_languages = ['hi', 'te', 'ta', 'ml', 'kn', 'mr', 'pa', 'bn']
merged_df = merged_df[
    merged_df['original_language'].isin(indian_languages) |
    merged_df['production_countries'].str.contains('India', na=False)
]

# Function to get poster URL with error handling
def get_poster(title, year, api_key):
    if not api_key or api_key == "YOUR_ACTUAL_TMDB_API_KEY":
        return f"https://via.placeholder.com/200x300?text={title.replace(' ', '+')}"
    try:
        # Test API key validity with a simple request
        test_response = requests.get(f"https://api.themoviedb.org/3/configuration?api_key={api_key}", timeout=5)
        test_response.raise_for_status()
        response = requests.get(
            f"https://api.themoviedb.org/3/search/movie?api_key={api_key}&query={title}&year={year}",
            timeout=5
        )
        response.raise_for_status()
        data = response.json()
        if data.get('results') and len(data['results']) > 0 and data['results'][0].get('poster_path'):
            return f"https://image.tmdb.org/t/p/w200{data['results'][0]['poster_path']}"
        print(f"No poster found for {title}")
        return f"https://via.placeholder.com/200x300?text={title.replace(' ', '+')}"
    except requests.exceptions.RequestException as e:
        print(f"API request failed for {title}: {e}")
        return f"https://via.placeholder.com/200x300?text={title.replace(' ', '+')}"

# Add poster URLs
tmdb_api_key = "YOUR_ACTUAL_TMDB_API_KEY"  # Replace with your TMDB API key
merged_df['poster'] = merged_df.apply(lambda row: get_poster(row['title'], row['release_date'][:4], tmdb_api_key), axis=1)

# Save merged dataset
merged_df.to_csv('../data/merged_indian_movies.csv', index=False)
print(f"Merged dataset saved with {len(merged_df)} Indian movies.")

Merged dataset saved with 56 Indian movies.


In [17]:
import pandas as pd

# Load the movies dataset
movies_df = pd.read_csv('../data/movies.csv')

# Generate image links based on movie titles
movies_df['image_link'] = movies_df['title'].apply(lambda x: f"https://via.placeholder.com/200x450?text={x.replace(' ', '+')}")

# Select only title and image_link columns
image_links_df = movies_df[['title', 'image_link']]

# Save to a new CSV file
image_links_df.to_csv('../data/image_links.csv', index=False)
print(f"Generated image_links.csv with {len(image_links_df)} entries.")

Generated image_links.csv with 4803 entries.


In [20]:
dfff=pd.read_csv('./TeluguMovies_dataset.csv')
print(dfff.columns)


Index(['Unnamed: 0', 'Movie', 'Year', 'Certificate', 'Genre', 'Overview',
       'Runtime', 'Rating', 'No.of.Ratings'],
      dtype='object')


In [25]:
import pandas as pd
import requests
import time

# Load the Telugu dataset
telugu_df = pd.read_csv("./TeluguMovies_dataset.csv")

# Drop unnecessary column if it's just an index
telugu_df = telugu_df.drop(columns=['Unnamed: 0']) if 'Unnamed: 0' in telugu_df.columns else telugu_df

# Initial manual mappings (for known titles to speed up process)
title_to_tmdb_id = {
    "Bahubali: The Beginning": 271726,
    "Baahubali 2: The Conclusion": 325561,
    "RRR": 615656,
    "Pushpa: The Rise": 610149,
    "Magadheera": 345013,
    "Arjun Reddy": 521077,
    "Ala Vaikunthapurramuloo": 658915,
    "Sye Raa Narasimha Reddy": 543099,
    "Jersey": 562974,
    "1 - Nenokkadine": 134374,
    "Dhoom: 3": 100302,
    "Ra.One": 56703,
    "Dhoom: 2": 1887,
    "Eega": 108063,
    "Krrish 3": 184315,
    "Bharat Ane Nenu": 488015,
    "Theri": 399177,
    "Dookudu": 101863,
    "Athadu": 125962
}

# Map titles to TMDB IDs dynamically
api_key = "8265bd1679663a7ea12ac168da84d2e8"
unmapped_titles = []

for index, row in telugu_df.iterrows():
    title = row['Movie']
    if title in title_to_tmdb_id:
        telugu_df.at[index, 'id'] = title_to_tmdb_id[title]
    else:
        search_url = f"https://api.themoviedb.org/3/search/movie?api_key={api_key}&query={title}&language=en-US"
        try:
            response = requests.get(search_url, timeout=10)
            response.raise_for_status()
            data = response.json()
            if data['results']:
                tmdb_id = data['results'][0]['id']
                telugu_df.at[index, 'id'] = tmdb_id
                title_to_tmdb_id[title] = tmdb_id  # Update dictionary for future use
                print(f"Mapped {title} to TMDB ID {tmdb_id}")
            else:
                telugu_df.at[index, 'id'] = 100000 + index  # Fallback ID
                unmapped_titles.append(title)
                print(f"No TMDB match found for {title}, using fallback ID {100000 + index}")
        except requests.exceptions.RequestException as e:
            telugu_df.at[index, 'id'] = 100000 + index  # Fallback on error
            unmapped_titles.append(title)
            print(f"Error searching TMDB for {title}: {e}, using fallback ID {100000 + index}")
        time.sleep(0.5)  # Avoid hitting TMDB rate limits (max 40 requests per 10 seconds)

# Map columns
telugu_df = telugu_df.rename(columns={
    'Movie': 'title',
    'Genre': 'genres',
    'Overview': 'overview'
})
telugu_df['original_language'] = 'te'
telugu_df['poster'] = None

# Fetch posters using TMDB IDs
for index, row in telugu_df.iterrows():
    movie_id = row['id']
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}&language=en-US"
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()
        poster_path = data.get('poster_path')
        if poster_path:
            telugu_df.at[index, 'poster'] = f"https://image.tmdb.org/t/p/w500/{poster_path}"
        else:
            print(f"No poster path found for ID {movie_id} ({row['title']})")
    except (KeyError, requests.exceptions.RequestException) as e:
        print(f"Error fetching poster for ID {movie_id} ({row['title']}): {e}")

# Save the enhanced dataset
telugu_df.to_csv("./TeluguMovies_enhanced.csv", index=False)
print(telugu_df.head())
print(f"Unmapped titles: {unmapped_titles}")

Mapped Dhoom:3 to TMDB ID 44977
Mapped Dhoom:2 to TMDB ID 14194
Mapped Rangasthalam to TMDB ID 461126
Mapped War to TMDB ID 10431
Mapped Saaho to TMDB ID 454292
Mapped Pokiri to TMDB ID 23381
Mapped Sarkar to TMDB ID 624102
Mapped The Ghazi Attack to TMDB ID 442405
Mapped Kabali to TMDB ID 366080
Mapped MSG: The Messenger of God to TMDB ID 318848
Mapped Nanban to TMDB ID 69537
Mapped Srimanthudu to TMDB ID 319817
No TMDB match found for Veer - Vivegam, using fallback ID 100024
Mapped Billa 2 to TMDB ID 116185
Mapped Manam to TMDB ID 253533
Mapped 7 Aum Arivu to TMDB ID 86718
Mapped Bigil to TMDB ID 563309
Mapped Business Man to TMDB ID 366683
Mapped Geetha Govindam to TMDB ID 540468
Mapped Mahanati to TMDB ID 459713
Mapped Spyder to TMDB ID 337608
Mapped Nannaku Prematho to TMDB ID 365187
Mapped Dabangg 3 to TMDB ID 496339
Mapped MSG 2 the Messenger to TMDB ID 373195
Mapped Manikarnika: The Queen of Jhansi to TMDB ID 491622
Mapped Race Gurram to TMDB ID 262227
Mapped Okkadu to TMDB ID 

Mapped Komaram Puli to TMDB ID 199800
Mapped Rebel to TMDB ID 123156
Mapped iSmart Shankar to TMDB ID 609108
Mapped Anukokunda Oka Roju to TMDB ID 80862
Mapped Bheeshma to TMDB ID 674779
Mapped Karthikeya to TMDB ID 894762
Mapped Desamuduru to TMDB ID 117058
Mapped Gunde Jaari Gallanthayyinde to TMDB ID 181929
Mapped Pailwan to TMDB ID 607294
Mapped World Famous Lover to TMDB ID 659674
Mapped Theeya Velai Seiyyanum Kumaru to TMDB ID 190663
Mapped Raja The Great to TMDB ID 456431
Mapped Kadaram Kondan to TMDB ID 561295
Mapped Sammohanam to TMDB ID 530198
Mapped Simha to TMDB ID 54404
Mapped Aithe to TMDB ID 1357662
Mapped Takkari Donga to TMDB ID 82727
Mapped Kadhalil Sodhappuvadhu Yeppadi to TMDB ID 101793
Mapped Pammal K. Sambandam to TMDB ID 66344
Mapped Kanchana 2 to TMDB ID 330425
Mapped Saroja to TMDB ID 803757
Mapped Vel to TMDB ID 63274
Mapped Pandavulu Pandavulu Thummeda to TMDB ID 253047
Mapped Doosukeltha to TMDB ID 241120
Mapped Gundamma Katha to TMDB ID 86146
Mapped Oohalu 

No TMDB match found for Yamajaathakudu, using fallback ID 100449
Mapped Jaanu to TMDB ID 849173
Mapped Neevevaro to TMDB ID 544681
No TMDB match found for Power Unlimited 2, using fallback ID 100452
Mapped Guru to TMDB ID 604108
Mapped Kee to TMDB ID 489763
Mapped Jagadam to TMDB ID 81062
Mapped Daruvu to TMDB ID 152875
Mapped Damarukam to TMDB ID 145335
Mapped Super to TMDB ID 45132
No TMDB match found for Krishna: The Power of Indrakeeladri, using fallback ID 100459
Mapped Chakram to TMDB ID 132437
Mapped Size Zero to TMDB ID 1311149
Mapped Bhadra to TMDB ID 80924
No TMDB match found for Erra Buss, using fallback ID 100463
Mapped Lakshmi's NTR to TMDB ID 590753
Mapped Dhoni to TMDB ID 1355466
Mapped Appatlo Okadundevadu to TMDB ID 434295
Mapped Akhil to TMDB ID 370986
Mapped Daana Veera Soora Karna to TMDB ID 86137
Mapped Aadhi Bhagavan to TMDB ID 69564
Mapped Devudu Chesina Manushulu to TMDB ID 199544
Mapped Falaknuma Das to TMDB ID 590162
Mapped Raju Maharaju to TMDB ID 308298
No T

Mapped Devadasu to TMDB ID 307448
Mapped Kodama Simham to TMDB ID 278182
Mapped April 1 Vidudala to TMDB ID 40906
Mapped Lankeshwarudu to TMDB ID 279128
Mapped Kondaveeti Donga to TMDB ID 103227
Mapped O Pitta Katha to TMDB ID 681657
Mapped Mutamestri to TMDB ID 279121
Mapped Subbu to TMDB ID 463736
Mapped Pilla Nuvvu Leni Jeevitham to TMDB ID 303732
No TMDB match found for Sri Ramulayya, using fallback ID 100671
Mapped Boss to TMDB ID 228355
Mapped Anaganaga Oka Roju to TMDB ID 38887
Mapped Raghavendra to TMDB ID 202080
Mapped Kondaveeti Simhasanam to TMDB ID 1142476
Mapped Cinema Chupista Maava to TMDB ID 369215
No TMDB match found for Sri Jagadguru Adi Shankara, using fallback ID 100677
Mapped Hawaa to TMDB ID 725741
Mapped Toofan to TMDB ID 228979
Mapped Ongole Githa to TMDB ID 200013
Mapped Chukkallo Chandrudu to TMDB ID 117051
Mapped Indrudu Chandrudu to TMDB ID 66364
Mapped Sankranthi to TMDB ID 279459
Mapped Aa Okkati Adakku to TMDB ID 1282465
Mapped Eeshwar to TMDB ID 117085
M

Mapped Souryam to TMDB ID 73140
Mapped Blade Babji to TMDB ID 80943
Mapped Devi to TMDB ID 690617
Mapped Gulabi to TMDB ID 1275137
No TMDB match found for Rajendrudru Gajendrudru, using fallback ID 100874
Mapped Lovely to TMDB ID 122783
No TMDB match found for Adhirindhi Alludu, using fallback ID 100876
Mapped Andhrudu to TMDB ID 80836
Mapped Bluff Master to TMDB ID 571435
Mapped Aatagadharaa Siva to TMDB ID 567884
Mapped Rechipo to TMDB ID 23773
Mapped Kadhal Sadugudu to TMDB ID 69667
Mapped Ra Ra Krishnayya to TMDB ID 304791
No TMDB match found for Nuvvu Vasthavani, using fallback ID 100884
Mapped Prathigna to TMDB ID 987601
Mapped Life Before Wedding to TMDB ID 85307
Mapped Silly Fellows to TMDB ID 548382
No TMDB match found for Donga Police, using fallback ID 100888
Mapped Sundara Kanda to TMDB ID 279489
Mapped Chitram Bhalare Vichitram to TMDB ID 81000
Mapped Ninne Premistha to TMDB ID 307920
Mapped Sahasa Veerudu Sagara Kanya to TMDB ID 279479
Mapped Bhookailas to TMDB ID 155177


Mapped Kobbari Matta to TMDB ID 622602
No TMDB match found for Dhruvanakshatram, using fallback ID 101072
Mapped Per Sollum Pillai to TMDB ID 66372
Mapped Vivaha Bhojanambu to TMDB ID 402354
Mapped Shanti Kranti to TMDB ID 577977
Mapped Sukumarudu to TMDB ID 307624
Mapped Fitting Master to TMDB ID 81975
Mapped Operation Duryodhana to TMDB ID 360847
Mapped Prema to TMDB ID 341895
Mapped Mooga Manasulu to TMDB ID 307768
Mapped Ontari Poratam to TMDB ID 279499
Mapped Tandra Paparayudu to TMDB ID 1430398
Mapped Nireekshana to TMDB ID 450178
No TMDB match found for Premaabhishekam, using fallback ID 101084
Mapped Savaari to TMDB ID 716035
Mapped Surya IPS to TMDB ID 66409
Mapped Maharasan to TMDB ID 66358
Mapped Chakravarthy to TMDB ID 279287
Mapped Lover to TMDB ID 1219158
Mapped Jadoogadu to TMDB ID 400761
Mapped Allari Alludu to TMDB ID 500614
Mapped Pandava Vanavasamu to TMDB ID 86147
Mapped Mechanic Alludu to TMDB ID 279119
Mapped Kondaveeti Simham to TMDB ID 86136
Mapped Malini 22 to 

Mapped Abbaigaru to TMDB ID 279488
No TMDB match found for Ammo Okato Tariku, using fallback ID 101277
Mapped Raksha to TMDB ID 278200
No TMDB match found for Stoovertpuram Police Station, using fallback ID 101279
Mapped Lakshmi Raave Maa Intiki to TMDB ID 331316
Mapped Mr. Theertha to TMDB ID 995368
Mapped Ee Abbai Chala Manchodu to TMDB ID 117076
Mapped Yemaindi Ee Vela to TMDB ID 420380
Mapped Aadu Puli Attam to TMDB ID 66333
Mapped Subhakankshalu to TMDB ID 82752
No TMDB match found for Samsaaram Oka Chadarangam, using fallback ID 101287
No TMDB match found for Intinti Bhagavatham, using fallback ID 101288
Mapped Driver Ramudu to TMDB ID 994631
Mapped Saradaga Kasepu to TMDB ID 200305
Mapped Snehithuda to TMDB ID 109369
Mapped Kaasi to TMDB ID 1388768
Mapped Prema Pipasi to TMDB ID 800797
No TMDB match found for Uyarntha Ullam, using fallback ID 101294
Mapped Pekata Paparao to TMDB ID 442354
No TMDB match found for Praja pratinidhi, using fallback ID 101296
Mapped Meghasandesam to 

Error fetching poster for ID 100452.0 (Power Unlimited 2): 404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/100452.0?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US
No poster path found for ID 1311149.0 (Size Zero)
No poster path found for ID 590753.0 (Lakshmi's NTR)
Error fetching poster for ID 100473.0 (Adhipathi): 404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/100473.0?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US
Error fetching poster for ID 100492.0 (Sambo Siva Sambho): 404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/100492.0?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US
No poster path found for ID 261764.0 (Uthama Puthiran)
Error fetching poster for ID 100506.0 (Okka Magadu): 404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/100506.0?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US
Error fetching poster for ID 100508.0 (Tappu Chesi Pappu Koodu): 404 Cli

No poster path found for ID 866604.0 (Alludu Diddina Kapuram)
No poster path found for ID 1144820.0 (Kanchana)
No poster path found for ID 1178416.0 (Mayuri)
Error fetching poster for ID 100994.0 (Seetharamula Kalyanam Lankalo): 404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/100994.0?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US
No poster path found for ID 82316.0 (Yuvakudu)
No poster path found for ID 341310.0 (Jayammu Nischayammu Raa!)
No poster path found for ID 126206.0 (Manchi Donga)
No poster path found for ID 307957.0 (Soggadi Pellam)
No poster path found for ID 117035.0 (Bumper Offer)
No poster path found for ID 279132.0 (Yudda Bhoomi)
Error fetching poster for ID 101026.0 (Rayalaseema Love Story): 404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/101026.0?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US
No poster path found for ID 1090083.0 (Swargam Narakam)
Error fetching poster for ID 101038.0 (Idhi Mamulu Prema

No poster path found for ID 101372.0 (Mosagalaku Mosagaadu)
Error fetching poster for ID 101374.0 (Chinnabbayi): 404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/101374.0?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US
Error fetching poster for ID 101375.0 (Ugranarasimham): 404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/101375.0?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US
No poster path found for ID 1017675.0 (Chal Mohana Ranga)
No poster path found for ID 77037.0 (Maavichiguru)
No poster path found for ID 137364.0 (Money Money More Money)
No poster path found for ID 28887.0 (Evaraina Eppudaina)
No poster path found for ID 262994.0 (9 Nelalu)
                         title    Year Certificate  \
0      Bahubali: The Beginning  2015.0          UA   
1  Baahubali 2: The Conclusion  2017.0          UA   
2              1 - Nenokkadine  2014.0          UA   
3                      Dhoom:3  2013.0          UA   
4          

In [26]:
print(self.movies_with_credits[['cast', 'crew']].head())
self.movies_with_credits['cast'] = self.movies_with_credits['cast'].astype(str).fillna('')
self.movies_with_credits['crew'] = self.movies_with_credits['crew'].astype(str).fillna('')

NameError: name 'self' is not defined

In [28]:
import pandas as pd

# Load datasets
movies_df = pd.read_csv("../data/movies.csv")
credits_df = pd.read_csv("../data/credits.csv")
telugu_movies_df = pd.read_csv("../data/TeluguMovies_enhanced.csv")

# Check genres in movies_df
print("Movies genres issues:")
print(movies_df[movies_df['genres'].apply(lambda x: not pd.isna(x) and not x.startswith('['))]['genres'].head())

# Check cast and crew in credits_df
print("\nCredits cast issues:")
print(credits_df[credits_df['cast'].apply(lambda x: not pd.isna(x) and not x.startswith('['))]['cast'].head())
print("\nCredits crew issues:")
print(credits_df[credits_df['crew'].apply(lambda x: not pd.isna(x) and not x.startswith('['))]['crew'].head())

# Check genres in telugu_movies_df
print("\nTelugu movies genres issues:")
print(telugu_movies_df[telugu_movies_df['genres'].apply(lambda x: not pd.isna(x) and not x.startswith('['))]['genres'].head())

Movies genres issues:
Series([], Name: genres, dtype: object)

Credits cast issues:
Series([], Name: cast, dtype: object)

Credits crew issues:
Series([], Name: crew, dtype: object)

Telugu movies genres issues:
0                Action, Drama            
1                Action, Drama            
2             Action, Thriller            
3             Action, Thriller            
4    Action, Adventure, Sci-Fi            
Name: genres, dtype: object
