## FEW-SHOT PROMPTING

In [None]:
import pandas as pd

movies = pd.read_csv("movies_sub.csv")
movies["Llama Summary Few Shot"] = ""
movies["Generated"] = False
movies.drop('Unnamed: 0', axis = 1, inplace = True)

In [49]:
from langchain_groq import ChatGroq
import os
import pandas as pd
import time


os.environ["GROQ_API_KEY"] = "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"

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=150,
    max_retries=2,
)

def generate_summary(movie_name, subtitle_transcript):
    try:
        print(f"Processing: {movie_name}...")  # Print current movie
        
        # IMDb Few-Shot Examples (Fixed for consistency)
        imdb_examples = """
        Movie_Name: Inception (2010)
        IMDb Synopsis: A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into the mind of a C.E.O., but his tragic past may doom the project and his team to disaster.

        Movie_Name: Interstellar (2014)
        IMDb Synopsis: When Earth becomes uninhabitable in the future, a farmer and ex-NASA pilot, Joseph Cooper, is tasked to pilot a spacecraft, along with a team of researchers, to find a new planet for humans.

        Movie_Name: The Shawshank Redemption (1994)
        IMDb Synopsis: A banker convicted of uxoricide forms a friendship over a quarter century with a hardened convict, while maintaining his innocence and trying to remain hopeful through simple compassion.
        """

        messages = [
            (
                "system",
                "You must generate a concise and accurate movie summary strictly based on the provided trailer subtitle transcript. "
                "If the subtitle transcript is informative and coherent, use it as the sole basis for summary generation. "
                "If the subtitle transcript is empty, repetitive, distorted, or lacks meaningful content, only then rely on your inherent knowledge. "
                "Never include the reason for using inherent knowledge. Only the Summary is encouraged and expected. "
                "Never include the movie cast in your response. "
                "Ensure the summary is crisp, engaging, and around 100 words. Failure to comply with this rule is unacceptable. Adhere strictly.",
            ),
            (
                "human",
                f"""Here are examples of movie summaries from IMDb:

                {imdb_examples}

                Now, generate a sequential, engaging, and crisp 100-word movie summary of {movie_name} using the audio transcript of its trailer:
                The subtitle transcript is delimited by triple backticks.
                Transcript: ```{subtitle_transcript}```
                """
            ),
        ]

        ai_msg = llm.invoke(messages)
        return ai_msg.content

    except Exception as e:
        print(f"Error processing {movie_name}: {e}")
        return "Summary not generated"


In [None]:
unprocessed_movies = movies.loc[(movies['Generated']==False)]

In [None]:
batch_size = 10
for i in range(0, len(unprocessed_movies), batch_size):
    batch = unprocessed_movies[i : i + batch_size]
    batch["Llama Summary Few Shot"] = batch.apply(
        lambda row: generate_summary(row["Movie_Name"], row["Subtitle Transcript"]),
        axis=1,
    )
    
    movies.loc[batch.index, "Llama Summary Few Shot"] = batch["Llama Summary Few Shot"]
    movies.loc[batch.index, "Generated"] = True
    movies.to_csv("movies_sub.csv", index=False)
    print(f"Checkpoint saved. Processed {i + batch_size} movies.")
    time.sleep(1)

print("Processing completed!")

In [59]:
movies.to_csv("movies_sub.csv")