### Step 0: Ensure All Files are in the Same Directory

##### Place the following .py files:
- config.py

- main.py

- batch_processing.py

- run_pipeline.py

In [1]:
# Sentiment_Analysis.ipynb - Notebook to run the full pipeline

# Import necessary libraries and modules
import sys
import os
import pandas as pd

# Add the src directory to Python's path
sys.path.append(os.path.join(os.getcwd(), 'src'))

# Import core functions from the main module
from main import analyze_sentiment, detect_sarcasm, generate_gemma_response
from batch_processing import batch_process
from config import CONFIG

In [2]:
# Step 1: Define the path to the cleaned dataset
DATASET_PATH = os.path.join(os.getcwd(), "datasets", "UScomments_final_cleaned.csv")

In [3]:
# Step 2: Load the cleaned dataset
data = pd.read_csv(DATASET_PATH, low_memory=False)

In [4]:
# Step 3: Extract comments and pre-labeled sentiment from the dataset
comments = data["cleaned_comment"].astype(str).tolist()
pre_labeled_sentiments = data["sentiment"].tolist()

In [5]:
# Step 4: Initialize a list to store sarcasm detection results
sarcasm_labels = []

In [None]:
# Step 5: Perform sarcasm detection in batches
for sarcasm_batch in batch_process(comments, CONFIG["batch_size"], detect_sarcasm):
    sarcasm_labels.extend(sarcasm_batch.to(CONFIG["device"]))

In [None]:
# Step 6: Generate responses using the Gemma-2b-it model
responses = [generate_gemma_response(comment) for comment in comments]

In [None]:
# Step 7: Combine all results into a DataFrame
df_results = pd.DataFrame({
    "Comment": comments,
    "Sentiment": pre_labeled_sentiments,  # Using the pre-labeled sentiment
    "Sarcasm": sarcasm_labels,
    "Response": responses
})

In [None]:
# Step 8: Save the results to a CSV file
OUTPUT_PATH = os.path.join(os.getcwd(), "outputs", "Processed_Comments.csv")
df_results.to_csv(OUTPUT_PATH, index=False)
print(f"Processing complete. Results saved to '{OUTPUT_PATH}'.")

In [None]:
# Step 9: Display a sample of the final results
df_results.head()