In [6]:
import pdfplumber

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Input PDF file
input_pdf = "Leap_Frog.pdf"
script_text = extract_text_from_pdf(input_pdf)


In [7]:
import re

def segment_script(script_text):
    scene_pattern = r"(EXT\.|INT\.)\s.*?\s-\s(DAY|NIGHT)"
    matches = list(re.finditer(scene_pattern, script_text))

    scenes = []
    for i, match in enumerate(matches):
        scene_heading = match.group(0)
        start_idx = match.end()
        end_idx = matches[i + 1].start() if i + 1 < len(matches) else len(script_text)
        scene_content = script_text[start_idx:end_idx].strip()
        scenes.append({"Scene": scene_heading, "Content": scene_content})

    return scenes

scenes = segment_script(script_text)


In [8]:
from transformers import pipeline

# Load the NER pipeline
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", grouped_entities=True)

def analyze_scene(scene):
    action, dialogue = [], []
    props, set_design = [], []
    lines = scene.split("\n")

    for line in lines:
        if re.match(r"^[A-Z]+(\s\([A-Z]+\))?:", line.strip()):  # Dialogue detection
            dialogue.append(line.strip())
        else:
            action.append(line.strip())  # Treat as action or description
    
    # Additional props and set design logic using NER
    ner_results = ner_pipeline(" ".join(action))
    for entity in ner_results:
        if entity["entity_group"] == "MISC":
            props.append(entity["word"])
        elif entity["entity_group"] == "LOC":
            set_design.append(entity["word"])

    return {
        "Action": " ".join(action),
        "Dialogue": "\n".join(dialogue),
        "Props": ", ".join(set(props)),
        "Set Design": ", ".join(set(set_design))
    }

# Process all scenes
enhanced_scenes = []
for scene in scenes:
    breakdown = analyze_scene(scene["Content"])
    breakdown["Scene"] = scene["Scene"]
    enhanced_scenes.append(breakdown)


Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [12]:
from fpdf import FPDF

def sanitize_text(text):
    """
    Replace or remove unsupported characters for FPDF.
    """
    return text.encode('latin-1', 'replace').decode('latin-1')

def create_breakdown_pdf(scenes, output_path):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    
    for scene in scenes:
        pdf.cell(200, 10, txt=sanitize_text(f"Scene: {scene['Scene']}"), ln=True)
        pdf.multi_cell(200, 10, txt=sanitize_text(f"Action: {scene.get('Action', 'N/A')}"))
        pdf.multi_cell(200, 10, txt=sanitize_text(f"Dialogue: {scene.get('Dialogue', 'N/A')}"))
        pdf.multi_cell(200, 10, txt=sanitize_text(f"Props: {scene.get('Props', 'N/A')}"))
        pdf.multi_cell(200, 10, txt=sanitize_text(f"Set Design: {scene.get('Set Design', 'N/A')}"))
        pdf.cell(200, 10, txt="="*80, ln=True)
    
    pdf.output(output_path)
    print(f"PDF saved to {output_path}")

# Save the PDF
output_pdf = "LEAP_FROG_BREAKDOWN_Output_2.pdf"
create_breakdown_pdf(enhanced_scenes, output_pdf)


PDF saved to LEAP_FROG_BREAKDOWN_Output_2.pdf


In [53]:
from transformers import pipeline
import re

# Load the NER pipeline
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", grouped_entities=True)

def analyze_scene(scene):
    """
    Analyze a scene to extract action, dialogue, props, set design, and additional categories.
    """
    action, dialogue = [], []
    props, set_design, extras = [], [], []
    hair_makeup_costume, special_effects = [], []

    lines = scene.split("\n")

    for line in lines:
        # Dialogue Detection
        if re.match(r"^[A-Z]+(\s\([A-Z]+\))?:", line.strip()):
            dialogue.append(line.strip())
        else:
            action.append(line.strip())  # Treat as action or description

    # NER Analysis
    action_text = " ".join(action)
    ner_results = ner_pipeline(action_text)

    for entity in ner_results:
        word = entity["word"].strip()
        label = entity["entity_group"]

        if label == "MISC":
            props.append(word)
        elif label == "LOC":
            set_design.append(word)
        elif label == "PER":
            extras.append(word)  # Extras (generic characters)

    # Custom keyword-based extraction for specific categories
    keywords_hair_makeup = ["wig", "costume", "makeup", "dress"]
    keywords_special_effects = ["explosion", "fire", "CGI", "visual effect"]

    for word in action_text.split():
        if word.lower() in keywords_hair_makeup:
            hair_makeup_costume.append(word)
        if word.lower() in keywords_special_effects:
            special_effects.append(word)

    return {
        "Action": " ".join(action),
        "Dialogue": "\n".join(dialogue),
        "Props": ", ".join(set(props)),
        "Set Design": ", ".join(set(set_design)),
        "Extras": ", ".join(set(extras)),
        "Hair/Makeup/Costume": ", ".join(set(hair_makeup_costume)),
        "Special Effects": ", ".join(set(special_effects)),
    }

# Example Scene for Testing
example_scene = """
INT. LIVING ROOM - NIGHT
John enters the room, wearing a red wig. The room is dimly lit with candles.
He looks at the broken chair and sighs. Suddenly, an explosion shakes the house.
MARY (O.S.)
What was that?
"""

# Test the refined NER logic
result = analyze_scene(example_scene)
print(result)


{'Action': ' INT. LIVING ROOM - NIGHT John enters the room, wearing a red wig. The room is dimly lit with candles. He looks at the broken chair and sighs. Suddenly, an explosion shakes the house. MARY (O.S.) What was that? ', 'Dialogue': '', 'Props': '', 'Set Design': '', 'Extras': 'John', 'Hair/Makeup/Costume': '', 'Special Effects': 'explosion'}


In [54]:
def analyze_scene(scene):
    action, dialogue = [], []
    props, set_design = [], []
    hair_makeup_costume, special_effects = [], []
    extras = set()

    lines = scene.split("\n")

    for line in lines:
        if re.match(r"^[A-Z]+(\s\([A-Z]+\))?:", line.strip()):  # Dialogue detection
            dialogue.append(line.strip())
        else:
            action.append(line.strip())  # Treat as action or description

    # Keyword-based extraction
    keywords_props = ["chair", "candles", "table", "glass"]
    keywords_set_design = ["dimly lit", "room", "decor", "scenery"]
    keywords_hair_makeup = ["wig", "makeup", "costume", "mask"]
    keywords_special_effects = ["explosion", "fire", "thunder", "gunshot"]

    for word in " ".join(action).split():
        if any(kw in word.lower() for kw in keywords_props):
            props.append(word)
        if any(kw in word.lower() for kw in keywords_set_design):
            set_design.append(word)
        if any(kw in word.lower() for kw in keywords_hair_makeup):
            hair_makeup_costume.append(word)
        if any(kw in word.lower() for kw in keywords_special_effects):
            special_effects.append(word)

    # NER-based extraction
    ner_results = ner_pipeline(" ".join(action))
    for entity in ner_results:
        if entity["entity_group"] == "PER":
            extras.add(entity["word"])
    
    return {
        "Action": " ".join(action),
        "Dialogue": "\n".join(dialogue),
        "Props": ", ".join(set(props)),
        "Set Design": ", ".join(set(set_design)),
        "Extras": ", ".join(extras),
        "Hair/Makeup/Costume": ", ".join(set(hair_makeup_costume)),
        "Special Effects": ", ".join(set(special_effects)),
    }


In [55]:
# Process all scenes
enhanced_scenes = []
for scene in scenes:
    breakdown = analyze_scene(scene["Content"])
    breakdown["Scene"] = scene["Scene"]
    enhanced_scenes.append(breakdown)

# Display the enhanced scenes
for idx, scene in enumerate(enhanced_scenes, start=1):
    print(f"Scene {idx}:")
    print(f"Scene Heading: {scene['Scene']}")
    print(f"Action: {scene['Action']}")
    print(f"Dialogue: {scene['Dialogue']}")
    print(f"Props: {scene['Props']}")
    print(f"Set Design: {scene['Set Design']}")
    print(f"Extras: {scene['Extras']}")
    print(f"Hair/Makeup/Costume: {scene['Hair/Makeup/Costume']}")
    print(f"Special Effects: {scene['Special Effects']}")
    print("=" * 80)

# Optional: Save to a JSON file for further use
import json
output_file = "enhanced_scenes.json"
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(enhanced_scenes, f, indent=4)

print(f"Enhanced scenes saved to {output_file}")


Scene 1:
Scene Heading: EXT. HANNAH’S HOUSE - DAY
Action: LEAP FROG Written by Kaiden Creagh 6600 Sage Ave, Summerland BC 2503289008 HANNAH (9) perched on the back porch of a ranch house extremely invested on her brand new Leapfrog. Some sound effects play and she thrusts her arms into the air with excitement and relief. HANNAH Yesssss. FATHER (OS) Dinner time. Hannah’s expression changes to despondent. A new frog walks out of the water. HANNAH Ok.
Dialogue: 
Props: 
Set Design: 
Extras: Hannah, Kaiden Creagh, HANNAH, H
Hair/Makeup/Costume: 
Special Effects: 
Scene 2:
Scene Heading: INT. HANNAH’S HOUSE KITCHEN - DAY
Action: Hannah, her Mother (31) and Father (35) sit at a circular wooden table, it’s awkward. FATHER So, Hannah what do you want to be when your older? Hannah shrugs. FATHER (CONT’D) Well if you don’t know, what do you like? Cmon you have to like something? Hannah contemplates and then quickly looks at the Leap Frog on her lap. She point the blank black screen at her face m

In [57]:
from transformers import pipeline
import re

def refined_ner(scene_content):
    """
    Refine NER logic to identify missing categories.
    """
    ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", grouped_entities=True)

    # Initial NER processing
    ner_results = ner_pipeline(scene_content)
    
    # Initialize categories
    characters, props, locations, hair_makeup_costume, special_effects = set(), set(), set(), set(), set()

    # Keyword-based rules for manual tagging
    hair_makeup_keywords = ["wig", "makeup", "costume", "hat", "glasses"]
    special_effects_keywords = ["explosion", "fire", "rain", "fog", "stunt", "sound effect"]

    # Process NER results
    for entity in ner_results:
        word = entity["word"].strip()
        label = entity["entity_group"]

        if label == "PER":
            characters.add(word)
        elif label == "LOC":
            locations.add(word)
        elif label == "MISC":
            props.add(word)

    # Rule-based tagging for additional categories
    for line in scene_content.split("\n"):
        for keyword in hair_makeup_keywords:
            if keyword in line.lower():
                hair_makeup_costume.add(keyword)

        for keyword in special_effects_keywords:
            if keyword in line.lower():
                special_effects.add(keyword)

    return {
        "Characters": ", ".join(characters) or "N/A",
        "Props": ", ".join(props) or "N/A",
        "Locations": ", ".join(locations) or "N/A",
        "Hair/Makeup/Costume": ", ".join(hair_makeup_costume) or "N/A",
        "Special Effects": ", ".join(special_effects) or "N/A"
    }

# Example usage
scene_content = "INT. LIVING ROOM - NIGHT\nJohn enters the room, wearing a red wig. The room is dimly lit with candles. An explosion shakes the house."
refined_results = refined_ner(scene_content)

print("Refined Results:")
for key, value in refined_results.items():
    print(f"{key}: {value}")

Refined Results:
Characters: John
Props: N/A
Locations: N/A
Hair/Makeup/Costume: wig
Special Effects: explosion


In [61]:
from transformers import pipeline
import re

# Load NER pipeline
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", grouped_entities=True)

def analyze_scene(scene):
    """Refine analysis to extract additional categories."""
    action, dialogue = [], []
    props, set_design, extras, costumes, special_effects = set(), set(), set(), set(), set()
    
    # Split scene content into lines
    lines = scene.split("\n")

    for line in lines:
        line = line.strip()
        if re.match(r"^[A-Z]+(\s\([A-Z]+\))?:", line):  # Dialogue detection
            dialogue.append(line)
        else:
            action.append(line)  # Treat as action or description

    # Perform NER on action lines
    ner_results = ner_pipeline(" ".join(action))
    for entity in ner_results:
        label = entity["entity_group"]
        word = entity["word"].strip()
        if label == "LOC":
            set_design.add(word)
        elif label == "MISC":
            props.add(word)
        elif label == "PER":
            extras.add(word)

    # Custom rules for costumes and special effects
    for line in action:
        if "wearing" in line or "dressed in" in line:
            costumes.add(line)
        if any(effect in line for effect in ["explosion", "fire", "smoke", "lightning"]):
            special_effects.add(line)

    return {
        "Action": " ".join(action),
        "Dialogue": "\n".join(dialogue),
        "Props": ", ".join(props) if props else "N/A",
        "Set Design": ", ".join(set_design) if set_design else "N/A",
        "Extras": ", ".join(extras) if extras else "N/A",
        "Hair/Makeup/Costume": ", ".join(costumes) if costumes else "N/A",
        "Special Effects": ", ".join(special_effects) if special_effects else "N/A",
    }

# Test updated logic with sample scenes
refined_scenes = []
for scene in scenes:
    breakdown = analyze_scene(scene["Content"])
    breakdown["Scene"] = scene["Scene"]
    refined_scenes.append(breakdown)


In [62]:
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib import colors

def create_polished_pdf(scenes, output_path):
    # Create a PDF document
    doc = SimpleDocTemplate(output_path, pagesize=letter)

    # Define styles
    styles = getSampleStyleSheet()
    title_style = ParagraphStyle(
        "TitleStyle",
        parent=styles["Heading2"],
        fontName="Helvetica-Bold",
        fontSize=14,
        textColor=colors.darkblue,
        spaceAfter=10,
    )
    section_style = ParagraphStyle(
        "SectionStyle",
        parent=styles["Normal"],
        fontName="Helvetica-Bold",
        fontSize=12,
        textColor=colors.black,
        spaceAfter=5,
    )
    content_style = ParagraphStyle(
        "ContentStyle",
        parent=styles["Normal"],
        fontSize=10,
        leading=12,
        textColor=colors.black,
        spaceAfter=8,
    )
    divider_style = ParagraphStyle(
        "DividerStyle",
        parent=styles["Normal"],
        fontSize=10,
        textColor=colors.grey,
        spaceBefore=10,
        spaceAfter=10,
    )

    elements = []

    for scene in scenes:
        # Add scene heading
        elements.append(Paragraph(f"Scene: {scene['Scene']}", title_style))

        # Add sections
        elements.append(Paragraph("Action:", section_style))
        elements.append(Paragraph(scene.get("Action", "N/A"), content_style))

        elements.append(Paragraph("Dialogue:", section_style))
        elements.append(Paragraph(scene.get("Dialogue", "N/A"), content_style))

        elements.append(Paragraph("Characters:", section_style))
        elements.append(Paragraph(scene.get("Extras", "N/A"), content_style))

        elements.append(Paragraph("Props:", section_style))
        elements.append(Paragraph(scene.get("Props", "N/A"), content_style))

        elements.append(Paragraph("Set Design:", section_style))
        elements.append(Paragraph(scene.get("Set Design", "N/A"), content_style))

        elements.append(Paragraph("Hair/Makeup/Costume:", section_style))
        elements.append(Paragraph(scene.get("Hair/Makeup/Costume", "N/A"), content_style))

        elements.append(Paragraph("Special Effects:", section_style))
        elements.append(Paragraph(scene.get("Special Effects", "N/A"), content_style))

        # Add divider
        elements.append(Paragraph("=" * 80, divider_style))
        elements.append(Spacer(1, 12))

    # Build the PDF
    doc.build(elements)
    print(f"PDF saved to {output_path}")

# Save the polished PDF
output_pdf_path = "LEAP_FROG_BREAKDOWN_Refined.pdf"
create_polished_pdf(refined_scenes, output_pdf_path)


PDF saved to LEAP_FROG_BREAKDOWN_Refined.pdf
