# AI Project - Filmception
## Submitted By:
### Tehreem Zafar & Umar Murtaza

In [1]:
# Imports
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sentence_transformers import SentenceTransformer

# Optional: Mount drive if using Google Drive (skip in VS Code)
# from google.colab import drive
# drive.mount('/content/drive')

  from .autonotebook import tqdm as notebook_tqdm


Cleaning Data (plot_summaries.txt, movie.metadata.tsv)

In [2]:
import pandas as pd
import re
import nltk
nltk.download('stopwords')
#nltk.download('punkt') # not needed
nltk.download('wordnet')
nltk.download('punkt_tab') # download resource
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Load summaries
with open(f'plot_summaries.txt', encoding='utf-8') as f:
    lines = f.readlines()
summaries = dict(line.strip().split('\t', 1) for line in lines)

# Load metadata
meta = pd.read_csv(f'movie.metadata.tsv', sep='\t', header=None)
meta = meta[[0, 8]]  # movie_id and genres
meta.columns = ['movie_id', 'genres']

# Merge
df = pd.DataFrame.from_dict(summaries, orient='index', columns=['summary'])
df.index.name = 'movie_id'
df.reset_index(inplace=True)
# Convert 'movie_id' column in df to int64 before merging
df['movie_id'] = df['movie_id'].astype(int)
df = df.merge(meta, on='movie_id')

# Clean summaries
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = text.lower()
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(t) for t in tokens if t not in stop_words]
    return ' '.join(tokens)

df['clean_summary'] = df['summary'].apply(clean_text)

# Convert genres from stringified JSON
import ast
df['genres'] = df['genres'].apply(lambda g: list(ast.literal_eval(g).values()) if pd.notna(g) else [])

# Save cleaned data
df.to_csv(f'cleaned_movies.csv', index=False)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


Step 2: Translation & Text-to-Speech

Installing required libraries:

In [3]:
%pip install googletrans==4.0.0-rc1 gTTS

Collecting googletrans==4.0.0-rc1Note: you may need to restart the kernel to use updated packages.

  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting htt

  DEPRECATION: Building 'googletrans' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'googletrans'. Discussion can be found at https://github.com/pypa/pip/issues/6334
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gradio 5.29.0 requires httpx>=0.24.1, but you have httpx 0.13.3 which is incompatible.
gradio-client 1.10.0 requires httpx>=0.24.1, but you have httpx 0.13.3 which is incompatible.


Import and load cleaned data

In [5]:
import pandas as pd
from googletrans import Translator
from gtts import gTTS
import os

# Load cleaned summaries
df = pd.read_csv('cleaned_movies.csv') 
df = df.head(60)

Translate summaries

In [6]:
%pip install deep-translator

Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Collecting beautifulsoup4<5.0.0,>=4.9.1 (from deep-translator)
  Using cached beautifulsoup4-4.13.4-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4<5.0.0,>=4.9.1->deep-translator)
  Using cached soupsieve-2.7-py3-none-any.whl.metadata (4.6 kB)
Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
Using cached beautifulsoup4-4.13.4-py3-none-any.whl (187 kB)
Using cached soupsieve-2.7-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4, deep-translator

   ---------------------------------------- 0/3 [soupsieve]
   ------------- -------------------------- 1/3 [beautifulsoup4]
   ------------- -------------------------- 1/3 [beautifulsoup4]
   ------------- -------------------------- 1/3 [beautifulsoup4]
   ------------- -------------------------- 1/3 [beautifulsoup4]
   ------------- -------------------------- 1/3 [beautifu

In [7]:
from deep_translator import GoogleTranslator
import pandas as pd
import time

def translate_text(text, lang, chunk_size=4500):
    if not isinstance(text, str) or text.strip() == "":
        return ""
    try:
        # Split text into manageable chunks under the 5000 char limit
        chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
        translated_chunks = [
            GoogleTranslator(source='auto', target=lang).translate(chunk)
            for chunk in chunks
        ]
        return " ".join(translated_chunks)
    except Exception as e:
        print(f"Translation error for lang '{lang}': {e}")
        return ""

df = pd.read_csv('cleaned_movies.csv')
df = df.dropna(subset=['clean_summary'])  # Optional: remove NaN summaries
df = df.reset_index(drop=True)

# Optional: reduce for testing
df = df.head(60)

translated_ar = []
translated_ur = []
translated_ko = []

for idx, row in df.iterrows():
    summary = row['clean_summary']

    ar = translate_text(summary, 'ar')
    ur = translate_text(summary, 'ur')
    ko = translate_text(summary, 'ko')

    translated_ar.append(ar)
    translated_ur.append(ur)
    translated_ko.append(ko)

    # Show progress and add delay to avoid rate limiting
    print(f"Translated index {idx}")
    time.sleep(1)  # 1 sec delay per row (adjustable)

    # Save interim progress every 10 rows
    if idx % 10 == 0:
        df['summary_ar'] = pd.Series(translated_ar)
        df['summary_ur'] = pd.Series(translated_ur)
        df['summary_ko'] = pd.Series(translated_ko)
        df.to_csv('translated_checkpoints.csv', index=False)

# Final save
df['summary_ar'] = translated_ar
df['summary_ur'] = translated_ur
df['summary_ko'] = translated_ko
df.to_csv('translated_checkpoints.csv', index=False)

print("✅ Translations complete and saved!")

Translated index 0
Translated index 1
Translated index 2
Translated index 3
Translated index 4
Translated index 5
Translated index 6
Translated index 7
Translated index 8
Translated index 9
Translated index 10
Translated index 11
Translated index 12
Translated index 13
Translated index 14
Translated index 15
Translated index 16
Translated index 17
Translated index 18
Translated index 19
Translated index 20
Translated index 21
Translated index 22
Translated index 23
Translated index 24
Translated index 25
Translated index 26
Translated index 27
Translated index 28
Translated index 29
Translated index 30
Translated index 31
Translated index 32
Translated index 33
Translated index 34
Translated index 35
Translated index 36
Translated index 37
Translated index 38
Translated index 39
Translated index 40
Translated index 41
Translated index 42
Translated index 43
Translated index 44
Translated index 45
Translated index 46
Translated index 47
Translated index 48
Translated index 49
Translated

Audio Conversion

In [8]:
%pip install gTTS

Note: you may need to restart the kernel to use updated packages.


In [10]:
import os
import time
import json
from gtts import gTTS
from gtts.tts import gTTSError

# Load translated data
df = pd.read_csv('translated_checkpoints.csv')

# Languages to convert and their respective columns
lang_map = {
    'ar': 'summary_ar',
    'ur': 'summary_ur',
    'ko': 'summary_ko'
}

# Directory to save audio
audio_dir = 'audio'
os.makedirs(audio_dir, exist_ok=True)

# Dictionary to store audio file paths
audio_files = {}

# TTS conversion loop
for idx, row in df.iterrows():
    movie_id = str(row['movie_id'])
    audio_files[movie_id] = {}

    for lang, col in lang_map.items():
        text = row.get(col, '')
        filename = f"{movie_id}_{lang}.mp3"
        filepath = os.path.join(audio_dir, filename)

        # Skip if already exists
        if os.path.exists(filepath):
            print(f"Already exists: {filename}")
            audio_files[movie_id][lang] = filename
            continue

        # Retry mechanism
        success = False
        for attempt in range(3):
            try:
                tts = gTTS(text=text, lang=lang)
                tts.save(filepath)
                print(f"✅ Saved: {filename}")
                audio_files[movie_id][lang] = filename
                success = True
                break
            except gTTSError as e:
                print(f"❌ Error for movie_id {movie_id} lang {lang} (Attempt {attempt+1}/3): {e}")
                time.sleep(15)  # Wait before retry

        if not success:
            print(f"❌ Failed after 3 attempts: {filename}")

        time.sleep(10)  # Delay between conversions

# Save JSON
json_path = os.path.join(audio_dir, 'audio_files.json')
with open(json_path, 'w') as f:
    json.dump(audio_files, f, indent=2)

print(f"\n🎉 Audio processing complete. Index saved to {json_path}")

❌ Error for movie_id 23890098 lang ar (Attempt 1/3): 429 (Too Many Requests) from TTS API. Probable cause: Unknown


KeyboardInterrupt: 

Genre Prediction Model

1. Load the cleaned data

In [11]:
print(df.columns.tolist())

['movie_id', 'summary', 'genres', 'clean_summary', 'summary_ar', 'summary_ur', 'summary_ko']


In [12]:
import pandas as pd

df = pd.read_csv('cleaned_movies.csv')
df.dropna(subset=['clean_summary', 'genres'], inplace=True)

2. Feature Extraction

In [14]:
from sentence_transformers import SentenceTransformer
import torch

# Load model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)

# Encode summaries
embeddings = model.encode(df['clean_summary'].tolist(), show_progress_bar=True, device=device)

Batches: 100%|██████████| 1319/1319 [20:35<00:00,  1.07it/s]


Multi-Label Genre Classification: Since each movie can belong to multiple genres, you’ll perform multi-label classification.

1. Prepare Genre Labels

In [15]:
from sklearn.preprocessing import MultiLabelBinarizer

# Convert string genres like "['Action', 'Comedy']" to actual lists
import ast
df['genres'] = df['genres'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Binarize genres
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(df['genres'])

# Optional: Save the genre classes for later use
genre_classes = mlb.classes_

2. Train/Test Split

In [16]:
print(len(embeddings), len(y))

42207 42207


In [17]:
# Drop rows where either 'clean_summary' or 'genres' is NaN
df_cleaned = df.dropna(subset=['clean_summary', 'genres'])

# Make sure the genres are properly formatted (if necessary, use ast.literal_eval)
df_cleaned['genres'] = df_cleaned['genres'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Now reprocess embeddings and labels
embeddings = model.encode(df_cleaned['clean_summary'].tolist(), show_progress_bar=True, device=device)
y = mlb.fit_transform(df_cleaned['genres'])

# Check the lengths again
print(len(embeddings), len(y))


Batches: 100%|██████████| 1319/1319 [21:31<00:00,  1.02it/s]


42207 42207


Proceeding with train/test split after the length has been aligned (80/20)

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    embeddings, y, test_size=0.2, random_state=42
)

NameError: name 'embeddings' is not defined

In [9]:
"""from sklearn.decomposition import PCA

# Reduce embedding dimensions (optional, but helps with noise)
pca = PCA(n_components=128, random_state=42)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)"""


'from sklearn.decomposition import PCA\n\n# Reduce embedding dimensions (optional, but helps with noise)\npca = PCA(n_components=128, random_state=42)\nX_train_pca = pca.fit_transform(X_train)\nX_test_pca = pca.transform(X_test)'

Sklearn Model: OneVsRestClassifier is better for multi-label classification like this

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier

classifier = OneVsRestClassifier(LogisticRegression(max_iter=1000))
classifier.fit(X_train, y_train)

NameError: name 'X_train' is not defined

Evaluation (Hamming Loss, Accuracy, Precision, Recall, F1)

In [21]:
from sklearn.metrics import hamming_loss, accuracy_score, precision_score, recall_score, f1_score

# Predict on test set
y_pred = classifier.predict(X_test)

# Calculate metrics
hamming = hamming_loss(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')

# Print results as percentages (except Hamming Loss)
print(f"Hamming Loss: {hamming:.4f}")
print(f"Accuracy Score: {accuracy * 100:.2f}%")
print(f"Precision (micro): {precision * 100:.2f}%")
print(f"Recall (micro): {recall * 100:.2f}%")
print(f"F1 Score (micro): {f1 * 100:.2f}%")


Hamming Loss: 0.0089
Accuracy Score: 7.00%
Precision (micro): 65.12%
Recall (micro): 21.95%
F1 Score (micro): 32.83%


Using the Model for Predictions on New Summaries

1. Create a Prediction Function: Use your trained model to predict genres for new movie summaries

In [4]:
def predict_genres(summary_text, model, encoder_model, pca_model=None, threshold=0.5):
    try:
        print(f"Input summary: {summary_text[:50]}...")
        embedding = encoder_model.encode([summary_text], show_progress_bar=False)
        print(f"Original embedding shape: {embedding.shape}")
        
        embedding_pca = pca_model.transform(embedding)
        print(f"PCA embedding shape: {embedding_pca.shape}")
    except Exception as e:
        print(f"ERROR: {str(e)}")
        return f"Error: {str(e)}"

    embedding = encoder_model.encode([summary_text], convert_to_numpy=True).reshape(1, -1)

    if pca_model:
        embedding = pca_model.transform(embedding)

    scores = model.decision_function(embedding)
    preds = (scores >= threshold).astype(int)

    predicted_genres = mlb.inverse_transform(preds)
    return ", ".join(predicted_genres[0]) if predicted_genres and predicted_genres[0] else "No genre predicted"



Testing the genre prediction:

Save the model to reuse later

In [12]:
import joblib

joblib.dump(classifier, 'models/genre_classifier.pkl')
joblib.dump(mlb, 'models/genre_binarizer.pkl')
"""joblib.dump(pca, 'models/pca_model.pkl')"""


"joblib.dump(pca, 'models/pca_model.pkl')"

GUI

In [33]:
%pip install gradio
%pip install --upgrade httpx
%pip install --upgrade gradio

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [36]:
import joblib
import numpy as np
import os
import gradio as gr
from sentence_transformers import SentenceTransformer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.preprocessing import MultiLabelBinarizer
from gtts import gTTS

# ------------------------
# MODEL LOADING & SETUP
# ------------------------

# Create audio directory if it doesn't exist
os.makedirs("audio", exist_ok=True)

# Check if model files exist
model_path = 'models/genre_classifier.pkl'
binarizer_path = 'models/genre_binarizer.pkl'

try:
    # Try to load models
    if os.path.exists(model_path) and os.path.exists(binarizer_path):
        try:
            classifier = joblib.load(model_path)
            mlb = joblib.load(binarizer_path)
            print("✅ Loaded existing models")
        except Exception as e:
            print(f"Error loading models: {str(e)}")
            print("Creating dummy classifier for testing purposes...")
            # Create a dummy classifier and binarizer for testing
            classifier = OneVsRestClassifier(LinearSVC())
            mlb = MultiLabelBinarizer()
            
            # Define some dummy genre classes
            dummy_genres = ['Action', 'Adventure', 'Comedy', 'Drama', 'Horror', 'Romance', 'Sci-Fi', 'Thriller']
            # Fit the binarizer with dummy genres
            mlb.fit([dummy_genres])
    else:
        print("Model files not found. Creating dummy models for testing interface...")
        # Create a dummy classifier and binarizer for testing
        classifier = OneVsRestClassifier(LinearSVC())
        mlb = MultiLabelBinarizer()
        
        # Define some dummy genre classes
        dummy_genres = ['Action', 'Adventure', 'Comedy', 'Drama', 'Horror', 'Romance', 'Sci-Fi', 'Thriller']
        # Fit the binarizer with dummy genres
        mlb.fit([dummy_genres])
except Exception as e:
    print(f"Error during model initialization: {str(e)}")
    raise

# Load encoder model
try:
    encoder_model = SentenceTransformer('all-MiniLM-L6-v2')
    print("✅ Loaded encoder model")
except Exception as e:
    print(f"Error loading encoder model: {str(e)}")
    raise

# ------------------------
# PREDICTION FUNCTIONS
# ------------------------

def predict_genres(summary_text):
    """
    Predicts movie genres from a summary using the pre-trained model
    """
    if not summary_text or summary_text.strip() == "":
        return "Please enter a movie summary"
        
    try:
        # Encode the text directly without PCA
        embedding = encoder_model.encode([summary_text], convert_to_numpy=True)
        print(f"Embedding shape: {embedding.shape}")
        
        # Try to use the classifier, or use keyword-based fallback if it fails
        try:
            # Try to use the classifier for prediction
            scores = classifier.decision_function(embedding)
            preds = (scores >= 0.5).astype(int)
            predicted_genres = mlb.inverse_transform(preds)
            
            if predicted_genres and len(predicted_genres[0]) > 0:
                genres = ", ".join(predicted_genres[0])
                return f"Predicted genres: {genres}"
            else:
                # Fall back to keyword-based prediction if no genres were predicted
                return keyword_based_prediction(summary_text)
                
        except Exception as e:
            print(f"Classifier error: {str(e)}")
            print("Falling back to keyword-based genre prediction...")
            return keyword_based_prediction(summary_text)
            
    except Exception as e:
        print(f"ERROR during prediction: {str(e)}")
        return f"Error: {str(e)}"
        
def keyword_based_prediction(text):
    """
    A simple keyword-based genre predictor to use as fallback
    when the ML model isn't working properly
    """
    text = text.lower()
    
    # Dictionary of genres and their associated keywords
    genre_keywords = {
        'Action': ['fight', 'explosion', 'battle', 'combat', 'warrior', 'war', 'gun', 'weapon', 'martial', 'mission'],
        'Adventure': ['journey', 'quest', 'expedition', 'explore', 'discover', 'voyage', 'travel', 'treasure'],
        'Comedy': ['funny', 'laugh', 'humor', 'hilarious', 'joke', 'comic', 'comedy', 'comical', 'witty'],
        'Drama': ['emotional', 'relationship', 'family', 'struggle', 'life', 'conflict', 'serious', 'tragic'],
        'Fantasy': ['magic', 'wizard', 'spell', 'mythical', 'dragon', 'fairy', 'enchanted', 'kingdom', 'supernatural'],
        'Horror': ['scary', 'fear', 'terror', 'monster', 'killer', 'ghost', 'haunt', 'blood', 'curse', 'evil', 'demon'],
        'Romance': ['love', 'relationship', 'romantic', 'passion', 'kiss', 'date', 'marriage', 'couple', 'affair'],
        'Sci-Fi': ['space', 'future', 'alien', 'technology', 'planet', 'robot', 'science', 'futuristic', 'spaceship'],
        'Thriller': ['suspense', 'mystery', 'crime', 'detective', 'investigation', 'serial', 'killer', 'murder', 'tension']
    }
    
    # Check for genre keywords in the text
    found_genres = []
    for genre, keywords in genre_keywords.items():
        for keyword in keywords:
            if keyword in text:
                found_genres.append(genre)
                break
    
    # Remove duplicates
    found_genres = list(set(found_genres))
    
    if found_genres:
        return f"Predicted genres (keyword-based): {', '.join(found_genres)}"
    else:
        return "No specific genre predicted using keywords. Try a more detailed summary."

# Audio conversion function
def convert_to_audio(summary_text, language):
    """
    Converts summary text to audio in the specified language
    """
    if not summary_text or summary_text.strip() == "":
        return None, "Please enter a movie summary"
    
    try:
        # Create a unique filename based on summary content (first 20 chars)
        filename = f"audio/summary_{hash(summary_text) % 10000}.mp3"
        
        # Generate audio file
        tts = gTTS(text=summary_text, lang=language)
        tts.save(filename)
        
        print(f"✅ Generated audio file: {filename}")
        return filename, f"Audio generated successfully in {language}"
    except Exception as e:
        print(f"ERROR during audio conversion: {str(e)}")
        return None, f"Error: Unable to convert to audio: {str(e)}"

# ------------------------
# EXAMPLE DATA
# ------------------------

# Example movie summaries for quick testing
example_summaries = [
    "A young farm boy joins a rebellion against an evil galactic empire after discovering his connection to a mystical power.",
    "A billionaire tech genius builds a powered suit of armor to escape captivity and becomes a superhero fighting against those who misuse his company's weapons.",
    "A group of friends embark on a terrifying journey when they discover a cursed videotape that kills its viewers seven days after watching it.",
    "Two teenagers from rival high school cliques fall in love and navigate their forbidden relationship despite the objections of their friends and families.",
    "An FBI trainee must seek the help of an imprisoned cannibalistic serial killer to catch another murderer who skins his victims."
]

# Language codes mapping for gTTS
language_options = {
    "English": "en",
    "Arabic": "ar",
    "Korean": "ko", 
    "Urdu": "ur",
    "Spanish": "es",
    "French": "fr",
    "German": "de",
    "Italian": "it",
    "Japanese": "ja",
    "Chinese": "zh-CN",
    "Hindi": "hi",
    "Portuguese": "pt"
}

# ------------------------
# CSS STYLING
# ------------------------

css = """
.filmception-container {
    border-radius: 10px;
    padding: 20px;
    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
}

.filmception-title {
    color: #010013;
    text-align: center;
    font-weight: 800;
    letter-spacing: 1px;
    margin-bottom: 10px;
}

.filmception-subtitle {
    color: #ffffff;
    font-size: 1.2em;
    text-align: center;
    margin-bottom: 10px;
}

.primary-btn {
    background: linear-gradient(90deg, #01b4e4, #90cea1) !important;
    border: none !important;
    color: white !important;
    font-weight: bold !important;
    padding: 8px 16px !important;
    border-radius: 5px !important;
    transition: all 0.3s ease !important;
    width: 100% !important;
    height: 45px !important;
}

.primary-btn:hover {
    box-shadow: 0 4px 10px rgba(1, 180, 228, 0.3) !important;
    transform: translateY(-2px) !important;
}

.feature-card {
    background-color: rgba(255, 255, 255, 0.05);
    border: 1px solid rgba(144, 206, 161, 0.2);
    border-radius: 8px;
    padding: 15px;
    margin-bottom: 15px;
}

.feature-title {
    color: #01b4e4;
    font-size: 1.1em;
    font-weight: bold;
    margin-bottom: 10px;
}

.tab-selected {
    background-color: rgba(1, 180, 228, 0.1) !important;
    border-bottom: 3px solid #01b4e4 !important;
    font-weight: bold !important;
}

.footer {
    text-align: center;
    color: #90cea1;
    font-size: 0.9em;
    padding-top: 20px;
    border-top: 1px solid rgba(144, 206, 161, 0.2);
    margin-top: 20px;
}

.examples-section {
    background-color: rgba(1, 180, 228, 0.05);
    border-radius: 8px;
    padding: 10px;
    margin-top: 10px;
}
#header-img{
    width: 100%;
    height: 300px;
    border-radius: 10px;
    margin-bottom: 20px;
}
"""

# ------------------------
# GRADIO UI DEFINITION
# ------------------------

with gr.Blocks(css=css, title="Filmception") as demo:
    with gr.Column(elem_classes="filmception-container"):
        gr.Image(value="https://img.freepik.com/premium-photo/flying-popcorn-3d-glasses-film-reel-clapboard-yellow-background-cinema-movie-concept-3d_989822-1302.jpg?semt=ais_hybrid&w=740", show_label=False, elem_id="header-img")
        gr.Markdown("# 🎬 FILMCEPTION", elem_classes="filmception-title")
        gr.Markdown("An AI-powered Multilingual movie summary translator and genre classifier", elem_classes="filmception-subtitle")
        
        # Input Section
        with gr.Column(elem_classes="feature-card"):
            gr.Markdown("### Enter Your Movie Summary", elem_classes="feature-title")
            input_text = gr.Textbox(
                placeholder="Enter a movie summary here or select from the examples below...", 
                label="",
                lines=5
            )
        
        # Examples Section
        with gr.Column(elem_classes="examples-section"):
            gr.Markdown("#### 📚 Example Summaries", elem_classes="feature-title")
            example_buttons = gr.Examples(
                examples=example_summaries,
                inputs=input_text,
                label="",
                examples_per_page=5
            )
                
        # Feature Tabs
        with gr.Tabs(selected=0) as tabs:
            # Tab 1: Audio Conversion
            with gr.Tab("🔊 Audio Narration", elem_classes="tab-selected"):
                with gr.Column(elem_classes="feature-card"):
                    gr.Markdown("Convert your movie summary to spoken audio", elem_classes="feature-title")
                    
                    with gr.Row():
                        language_dropdown = gr.Dropdown(
                            choices=list(language_options.keys()),
                            value="English",
                            label="Select Language"
                        )
                    
                    with gr.Row():
                        audio_btn = gr.Button("Generate Audio Narration", elem_classes="primary-btn")
                    
                    with gr.Row():
                        audio_output = gr.Audio(label="")
                    
                    audio_status = gr.Textbox(label="Status", interactive=False)
            
            # Tab 2: Genre Prediction
            with gr.Tab("🎭 Genre Prediction"):
                with gr.Column(elem_classes="feature-card"):
                    gr.Markdown("Predict movie genres from your summary", elem_classes="feature-title")
                    
                    with gr.Row():
                        genre_btn = gr.Button("Predict Genres", elem_classes="primary-btn")
                    
                    genre_output = gr.Textbox(label="", interactive=False)
        
        # Footer
        with gr.Row(elem_classes="footer"):
            gr.Markdown("Filmception - Analyze movie summaries through AI")
    
    # Connect the functions
    audio_btn.click(
        fn=lambda text, lang: convert_to_audio(text, language_options[lang]),
        inputs=[input_text, language_dropdown],
        outputs=[audio_output, audio_status]
    )
    
    genre_btn.click(
        fn=predict_genres,
        inputs=input_text,
        outputs=genre_output
    )

# Launch the application
if __name__ == "__main__":
    demo.launch(share=True)  # Set share=True to create a public link
    print("✨ Filmception is running! Open the URL above in your browser.")

✅ Loaded existing models
✅ Loaded encoder model
* Running on local URL:  http://127.0.0.1:7875
* Running on public URL: https://0dd2f2cbcb99154fc7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


✨ Filmception is running! Open the URL above in your browser.
