### Step 0: Ensure All Files are in the Same Directory

##### Place the following .py files:
- config.py

- main.py

- batch_processing.py

- run_pipeline.py

#### 1. Add src/ Directory to Python Path

In [None]:
import sys
import os

# Add 'src' directory to Python path
sys.path.append(os.path.join(os.getcwd(), 'src'))

#### 2. Enable Auto-Reloading for Code Changes

In [None]:
# Enable autoreload to automatically load any code changes
%load_ext autoreload
%autoreload 2

#### 3. Import Functions and Modules from src/

In [2]:
from src.config import CONFIG
from src.main import (
    analyze_sentiment, detect_sarcasm,
    contains_offensive_language, generate_responses
)
from src.batch_processing import batch_process
import src.run_pipeline as pipeline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


FileNotFoundError: [Errno 2] No such file or directory: 'UScomments.csv'

#### 4. Load and Validate the Dataset

In [None]:
import pandas as pd
import os

# Define the dataset path
DATASET_PATH = os.path.join(os.getcwd(), "datasets", "UScomments.csv")

# Load dataset with error handling
try:
    data = pd.read_csv(DATASET_PATH, on_bad_lines="skip")
except FileNotFoundError:
    raise FileNotFoundError(f"Dataset not found at: {DATASET_PATH}")

# Validate that the necessary column exists
if 'comment_text' not in data.columns:
    raise ValueError("The 'comment_text' column is missing from the dataset.")

comments = data['comment_text'].astype(str).tolist()
print(f"Loaded {len(comments)} comments from the dataset.")

#### 5. Process Sentiment in Batches

In [None]:
# Initialize an empty list to store sentiments
sentiments = []

# Process sentiment in batches using batch_process
for sentiment_batch in batch_process(comments, CONFIG["batch_size"], analyze_sentiment):
    sentiments.extend(sentiment_batch)

print(f"Processed sentiment for {len(sentiments)} comments.")

#### 6. Process Sarcasm Detection in Batches

In [None]:
# Initialize a list to store sarcasm detection results
sarcasm_labels = []

# Process sarcasm detection in batches
for sarcasm_batch in batch_process(comments, CONFIG["batch_size"], detect_sarcasm):
    sarcasm_labels.extend(sarcasm_batch)

print(f"Processed sarcasm detection for {len(sarcasm_labels)} comments.")

#### 7. Detect Offensive Language

In [None]:
# Detect offensive language for all comments
offensive_flags = [contains_offensive_language(comment) for comment in comments]

print(f"Identified offensive language in {sum(offensive_flags)} comments.")

#### 8. Generate Responses for Non-Neutral Comments

In [None]:
# Generate responses based on sentiment and comments
responses = generate_responses(sentiments, comments)

print(f"Generated responses for {len([r for r in responses if r])} non-neutral comments.")

#### 9. Combine All Results into a DataFrame

In [None]:
# Combine all results into a DataFrame
df_results = pd.DataFrame({
    "Comment": comments,
    "Sentiment": sentiments,
    "Sarcasm": sarcasm_labels,
    "Offensive_Flag": offensive_flags,
    "Response": responses
})

print(df_results.head(5))

#### 10. Save the Results to a CSV File

In [None]:
# Define the output path
OUTPUT_PATH = os.path.join(os.getcwd(), "outputs", "Processed_Comments.csv")

# Save results to CSV
df_results.to_csv(OUTPUT_PATH, index=False)
print(f"Results saved to '{OUTPUT_PATH}'.")