In [None]:
import pandas as pd

# Load the dataset
file_path = '/mnt/data/IMDB Dataset.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
df.head()


In [None]:
import re

# Clean the text data
def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

df['cleaned_review'] = df['review'].apply(clean_text)

# Encode target labels (assuming 'sentiment' column exists and contains 'positive'/'negative' labels)
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})


In [None]:
import google.generativeai as palm
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

# Initialize Google PaLM API
palm.configure(api_key=os.getenv("PALM_API_KEY"))

# Generate embeddings or sentiment labels
def extract_features_with_palm(text):
    try:
        response = palm.generate_text(prompt=text, temperature=0.0)
        
        # Check if the response was blocked
        if response.filters:
            print(f"Blocked reason: {response.filters[0]['reason']}")
            return None
        
        # Extract text or embeddings
        if response.candidates:
            return response.candidates[0]['output'].strip()
        else:
            return None
        
    except Exception as e:
        print(f"Error with PaLM model: {e}")
        return None

# Apply the function to the dataset
df['palm_output'] = df['cleaned_review'].apply(extract_features_with_palm)
