In [2]:
import requests
import pandas as pd

API_KEY = '24024bccf5f0cd1f8d60f9bacba3cb6d'
movie_list = [
    "KGF: Chapter 2",
    "Inception",
    "Interstellar",
    "The Dark Knight",
    "Avatar",
    "Pushpa: The Rise"
]

headers = {
    "User-Agent": "Mozilla/5.0"
}

all_movies = []

for movie_name in movie_list:
    print(f"\n🔍 Searching for: {movie_name}")
    search_url = f"https://api.themoviedb.org/3/search/movie?api_key={API_KEY}&query={movie_name}"

    try:
        search_response = requests.get(search_url, headers=headers).json()

        if search_response['results']:
            movie_id = search_response['results'][0]['id']

            details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={API_KEY}"
            details = requests.get(details_url, headers=headers).json()

            title = details.get('title', 'N/A')
            overview = details.get('overview', 'N/A')
            rating = details.get('vote_average', 'N/A')
            release_date = details.get('release_date', 'N/A')
            genres = ", ".join([genre['name'] for genre in details.get('genres', [])])

            budget = details.get('budget', 0)
            revenue = details.get('revenue', 0)

            # Convert to Crores (divide by 10 million)
            budget_cr = round(budget / 1e7, 2)
            revenue_cr = round(revenue / 1e7, 2)

            # Determine verdict
            if revenue == 0:
                verdict = "❓ Unknown"
            elif revenue < budget:
                verdict = "❌ Flop"
            elif revenue <= 1.5 * budget:
                verdict = "✅ Average"
            elif revenue <= 2 * budget:
                verdict = "🎯 Hit"
            else:
                verdict = "🌟 Blockbuster"

            # Print details
            print(f"🎬 Title: {title}")
            print(f"📅 Release Date: {release_date}")
            print(f"⭐ TMDb Rating: {rating}")
            print(f"🎭 Genres: {genres}")
            print(f"📝 Overview: {overview}")
            print(f"💰 Budget: ₹{budget_cr} Cr")
            print(f"📈 Revenue: ₹{revenue_cr} Cr")
            print(f"🏆 Verdict: {verdict}")

            all_movies.append({
                'Title': title,
                'Release Date': release_date,
                'TMDb Rating': rating,
                'Genres': genres,
                'Overview': overview,
                'Budget (Cr)': budget_cr,
                'Revenue (Cr)': revenue_cr,
                'Verdict': verdict
            })

        else:
            print("❌ Movie not found on TMDb.")

    except requests.exceptions.ConnectionError as e:
        print("⚠️ Network Error:", e)
        print("🔁 Try again later or check your internet connection.")

# Save to CSV
if all_movies:
    df = pd.DataFrame(all_movies)
    df.to_csv("movie_data.csv", index=False)
    print("\n✅ movie_data.csv file saved successfully!")

    # Download in Google Colab
    from google.colab import files
    files.download("movie_data.csv")



🔍 Searching for: KGF: Chapter 2
🎬 Title: K.G.F: Chapter 2
📅 Release Date: 2022-04-14
⭐ TMDb Rating: 7.471
🎭 Genres: Action, Thriller, Adventure, Crime
📝 Overview: The blood-soaked land of Kolar Gold Fields (KGF) has a new overlord now - Rocky, whose name strikes fear in the heart of his foes. His allies look up to Rocky as their Savior, the government sees him as a threat to law and order; enemies are clamoring for revenge and conspiring for his downfall. Bloodier battles and darker days await as Rocky continues on his quest for unchallenged supremacy.
💰 Budget: ₹1.3 Cr
📈 Revenue: ₹9.04 Cr
🏆 Verdict: 🌟 Blockbuster

🔍 Searching for: Inception
🎬 Title: Inception
📅 Release Date: 2010-07-15
⭐ TMDb Rating: 8.369
🎭 Genres: Action, Science Fiction, Adventure
📝 Overview: Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his targets is offered a chance to regain his old life as payment for a task considered to be impossible: "inception", the implantation

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [3]:
import pandas as pd

# Load the movie data from CSV
df = pd.read_csv("movie_data.csv")

# Show the first few rows
df.head()


Unnamed: 0,Title,Release Date,TMDb Rating,Genres,Overview,Budget (Cr),Revenue (Cr),Verdict
0,K.G.F: Chapter 2,2022-04-14,7.471,"Action, Thriller, Adventure, Crime",The blood-soaked land of Kolar Gold Fields (KG...,1.3,9.04,🌟 Blockbuster
1,Inception,2010-07-15,8.369,"Action, Science Fiction, Adventure","Cobb, a skilled thief who commits corporate es...",16.0,83.9,🌟 Blockbuster
2,Interstellar,2014-11-05,8.456,"Adventure, Drama, Science Fiction",The adventures of a group of explorers who mak...,16.5,74.66,🌟 Blockbuster
3,The Dark Knight,2008-07-16,8.521,"Drama, Action, Crime, Thriller",Batman raises the stakes in his war on crime. ...,18.5,100.46,🌟 Blockbuster
4,Avatar,2009-12-15,7.59,"Action, Adventure, Fantasy, Science Fiction","In the 22nd century, a paraplegic Marine is di...",23.7,292.37,🌟 Blockbuster


In [4]:
print(df.columns.tolist())


['Title', 'Release Date', 'TMDb Rating', 'Genres', 'Overview', 'Budget (Cr)', 'Revenue (Cr)', 'Verdict']


In [5]:
print("✅ Rows after cleaning:", df.shape[0])


✅ Rows after cleaning: 6


In [6]:
print(df.head())
print(df.columns.tolist())  # full list of column names


              Title Release Date  TMDb Rating  \
0  K.G.F: Chapter 2   2022-04-14        7.471   
1         Inception   2010-07-15        8.369   
2      Interstellar   2014-11-05        8.456   
3   The Dark Knight   2008-07-16        8.521   
4            Avatar   2009-12-15        7.590   

                                        Genres  \
0           Action, Thriller, Adventure, Crime   
1           Action, Science Fiction, Adventure   
2            Adventure, Drama, Science Fiction   
3               Drama, Action, Crime, Thriller   
4  Action, Adventure, Fantasy, Science Fiction   

                                            Overview  Budget (Cr)  \
0  The blood-soaked land of Kolar Gold Fields (KG...          1.3   
1  Cobb, a skilled thief who commits corporate es...         16.0   
2  The adventures of a group of explorers who mak...         16.5   
3  Batman raises the stakes in his war on crime. ...         18.5   
4  In the 22nd century, a paraplegic Marine is di...       

In [7]:
df.columns = df.columns.str.strip()
print(df.columns.tolist())  # Confirm cleaned column names


['Title', 'Release Date', 'TMDb Rating', 'Genres', 'Overview', 'Budget (Cr)', 'Revenue (Cr)', 'Verdict']


In [8]:
# Save cleaned file to CSV
df.to_csv("cleaned_movies.csv", index=False)

# Download the file to your computer
files.download("cleaned_movies.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
import pandas as pd

# Load your cleaned CSV file
df = pd.read_csv("cleaned_movies.csv")

# Show first few rows
df.head()


Unnamed: 0,Title,Release Date,TMDb Rating,Genres,Overview,Budget (Cr),Revenue (Cr),Verdict
0,K.G.F: Chapter 2,2022-04-14,7.471,"Action, Thriller, Adventure, Crime",The blood-soaked land of Kolar Gold Fields (KG...,1.3,9.04,🌟 Blockbuster
1,Inception,2010-07-15,8.369,"Action, Science Fiction, Adventure","Cobb, a skilled thief who commits corporate es...",16.0,83.9,🌟 Blockbuster
2,Interstellar,2014-11-05,8.456,"Adventure, Drama, Science Fiction",The adventures of a group of explorers who mak...,16.5,74.66,🌟 Blockbuster
3,The Dark Knight,2008-07-16,8.521,"Drama, Action, Crime, Thriller",Batman raises the stakes in his war on crime. ...,18.5,100.46,🌟 Blockbuster
4,Avatar,2009-12-15,7.59,"Action, Adventure, Fantasy, Science Fiction","In the 22nd century, a paraplegic Marine is di...",23.7,292.37,🌟 Blockbuster


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


In [11]:
df = pd.read_csv("cleaned_movies.csv")
df.head()


Unnamed: 0,Title,Release Date,TMDb Rating,Genres,Overview,Budget (Cr),Revenue (Cr),Verdict
0,K.G.F: Chapter 2,2022-04-14,7.471,"Action, Thriller, Adventure, Crime",The blood-soaked land of Kolar Gold Fields (KG...,1.3,9.04,🌟 Blockbuster
1,Inception,2010-07-15,8.369,"Action, Science Fiction, Adventure","Cobb, a skilled thief who commits corporate es...",16.0,83.9,🌟 Blockbuster
2,Interstellar,2014-11-05,8.456,"Adventure, Drama, Science Fiction",The adventures of a group of explorers who mak...,16.5,74.66,🌟 Blockbuster
3,The Dark Knight,2008-07-16,8.521,"Drama, Action, Crime, Thriller",Batman raises the stakes in his war on crime. ...,18.5,100.46,🌟 Blockbuster
4,Avatar,2009-12-15,7.59,"Action, Adventure, Fantasy, Science Fiction","In the 22nd century, a paraplegic Marine is di...",23.7,292.37,🌟 Blockbuster


In [12]:
df = df.drop(['Title', 'Release Date', 'Overview', 'Verdict'], axis=1)


In [13]:
# First split the Genres column into a list
df['Genres'] = df['Genres'].apply(lambda x: [i.strip() for i in x.split(',')])

# Use MultiLabelBinarizer to one-hot encode the genres
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
genre_df = pd.DataFrame(mlb.fit_transform(df['Genres']), columns=mlb.classes_)

# Drop the original Genres column and add new one-hot genre columns
df = pd.concat([df.drop(['Genres'], axis=1), genre_df], axis=1)


In [14]:
X = df.drop(['Revenue (Cr)'], axis=1)
y = df['Revenue (Cr)']


In [15]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# 1. Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, random_state=42
)

# 2. Train model
model = LinearRegression()
model.fit(X_train, y_train)


In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)


In [31]:
model = LinearRegression()
model.fit(X_train, y_train)


In [24]:
from sklearn.metrics import mean_squared_error, r2_score


In [36]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("📉 Mean Squared Error:", round(mse, 2))
print("📈 R² Score:", round(r2, 4))


📉 Mean Squared Error: 1103.43
📈 R² Score: 0.1676


In [51]:
# First, get the exact feature order from the model
required_features = model.feature_names_in_

# Create a dictionary with all features, defaulting to 0 if not specified
movie_data = {
    'Budget (Cr)': 100,
    'TMDb Rating': 8.5,
    'Action': 1,
    # All other features go here
    'Adventure': 2,
    'Fantasy': 0,
    'Science Fiction': 0,
    'Thriller': 0,
    'Drama': 1,
    'Crime': 0
    # Add any other features that appear in model.feature_names_in_
}

# Create DataFrame with EXACTLY the right features in the right order
new_movie = pd.DataFrame(
    data=[[movie_data[feature] for feature in required_features]],
    columns=required_features
)

# Make prediction
predicted_revenue = model.predict(new_movie)

# Print result (with fixed syntax)
print(f"Predicted Revenue: ${round(predicted_revenue[0]):,}")

Predicted Revenue: $949
