<a href="https://colab.research.google.com/github/ranwiththecode/fantasy_lit_sent_an/blob/main/sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from google.cloud import bigquery
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
from tqdm import tqdm

# Initialize NLTK
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

# Configuration (UPDATE THESE)
CONFIG = {
    "project_id": "stable-course-461105-k9",      # Case-sensitive
    "dataset_id": "google_imports",   # Your dataset
    "table_id": "pubs_2000_2005",           # Your table
    "drive_path": "/content/drive/MyDrive/Goodreads_Data/",
    "input_file": "eragon_paolini_clean.csv"  # Your cleaned file
}

def analyze_sentiment(text):
    """Calculate sentiment score using VADER"""
    if not isinstance(text, str) or not text.strip():
        return None
    return sid.polarity_scores(text)['compound']

def process_and_update():
    """End-to-end processing pipeline"""
    # Initialize BigQuery client
    bq_client = bigquery.Client(project=CONFIG['project_id'])

    # 1. Load cleaned data
    input_path = os.path.join(CONFIG['drive_path'], CONFIG['input_file'])
    print(f"\nLoading data from: {input_path}")
    df = pd.read_csv(input_path)

    # 2. Calculate sentiment (with progress bar)
    print("Analyzing sentiment...")
    tqdm.pandas(desc="Processing reviews")
    df['sentiment'] = df['clean_text'].progress_apply(analyze_sentiment)

    # 3. Prepare for BigQuery update
    table_ref = f"{CONFIG['project_id']}.{CONFIG['dataset_id']}.{CONFIG['table_id']}"
    update_df = df[['id', 'sentiment']]  # Columns to update

    # 4. Execute BigQuery update
    try:
        print(f"\nUpdating BigQuery table: {table_ref}")
        job_config = bigquery.LoadJobConfig(
            write_disposition="WRITE_TRUNCATE",
            create_disposition="CREATE_IF_NEEDED"
        )

        job = bq_client.load_table_from_dataframe(
            update_df,
            table_ref,
            job_config=job_config
        )
        job.result()  # Wait for completion

        print(f"✅ Success! Updated {len(update_df)} rows")
        print(f"Sample updated data:\n{update_df.head(3)}")

    except Exception as e:
        print(f"❌ Error: {str(e)}")
        if hasattr(e, 'errors'):
            for error in e.errors:
                print(f"Detail: {error['message']}")

# Run the pipeline
if __name__ == "__main__":
    process_and_update()