In [None]:
import pandas as pd
import openai
import os
from dotenv import load_dotenv
import json

In [None]:
# Initialize OpenAI API
openai.api_key = os.getenv("OPENAI_API_KEY")

# Load your dataset
# For the sake of example, let's assume your dataset is a CSV file
df = pd.read_csv("rt_reviews_cleaned_with_names.csv")  # Replace with your actual dataset

# Group reviews by movie
grouped_reviews = df.groupby('movie_title')['cleaned_review'].apply(list).reset_index()


In [None]:
def summarize_reviews(reviews):
    # Combine all reviews into a single text block
    reviews_text = ' '.join(reviews)
    
    # Use OpenAI API to summarize the reviews
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": (
            "Summarize the following movie reviews no more than 200 words."
            "First, state what the majority of viewers are saying. "
            "Second, mention what the minority of viewers are saying. "
            "Finally, provide the overall sentiment of the reviews:\n\n"
            f"{reviews_text}"
        )}
    ]
    
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
        max_tokens=200
    )
    
    summary = response.choices[0].message['content'].strip()
    return summary

In [None]:
# Create a dictionary to store summaries
movie_summaries = {}

# Iterate over each movie and summarize the reviews
for index, row in grouped_reviews.iterrows():
    movie_title = row['movie_title']
    reviews = row['cleaned_review']
    
    summary = summarize_reviews(reviews)
    movie_summaries[movie_title] = summary
    print(f"Summarized reviews for {movie_title}")

# Save the summaries to a JSON file
with open('movie_summaries.json', 'w') as f:
    json.dump(movie_summaries, f)

print("Summarization complete. Summaries saved to movie_summaries.json")

In [12]:
print(movie_summaries)

{'10 Cloverfield Lane': 'The majority of viewers praised "Cloverfield Lane" for its tense, thrilling, and unpredictable storyline. They found the performances, especially that of John Goodman, to be top-notch and greatly contributing to the suspenseful atmosphere of the film. Many viewers appreciated that the movie kept them on the edge of their seats with its shifting tones and cleverly crafted mystery. They found the direction to be well-executed and admired the claustrophobic environment that was created within just one setting. They also lauded the smart script and psychological elements.\n\nA minority of viewers, however, found the film\'s connection to the original "Cloverfield" to be weak and believed that the movie could have stood on its own without this link. Some detractors thought'}
