In [13]:
import os
import pandas as pd
from tqdm import tqdm
import requests
import json

# Ollama server configuration
OLLAMA_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "llama3.2:1b"

# Define input file path and load the dataset
file_path = "data/reviews.csv"
if not os.path.exists(file_path):
    raise FileNotFoundError(f"Input file not found at '{file_path}'")

df = pd.read_csv(file_path)

# Define column name containing reviews
review_column = "Review"

# Ensure the correct column exists
if review_column not in df.columns:
    raise ValueError(f"Column '{review_column}' not found in the dataset!")

# Define candidate labels and initialize output columns
candidate_labels = [
    "Talks about driving experience",
    "Talks about features",
    "Talks about value for money",
    "Talks about issues",
    "Other"
]
df["talks_about"] = ""
df["sentiment"] = ""

# Define dataframe size limit
df_size = 10  # Adjust this value to control the number of rows processed
if df_size > 0:
    df = df.head(df_size)

# Classification logic
def classify_review(review_text):
    prompt = (
        f"The following review needs classification:\n\n'{review_text}'\n\n"
        "Identify the main topic from these categories: "
        f"{', '.join(candidate_labels)}. "
        "Additionally, classify the sentiment as Positive or Negative."
    )
    
    try:
        # Send request to Ollama
        response = requests.post(
            OLLAMA_URL,
            json={"model": MODEL_NAME, "prompt": prompt},
            headers={"Content-Type": "application/json"}  # Explicitly set the content type
        )
        response.raise_for_status()  # Raise exception for HTTP errors
        
        # Validate JSON response
        try:
            result = response.json()
        except json.JSONDecodeError as e:
            raise ValueError(f"Failed to parse JSON response: {response.text}")
        
        if "content" not in result:
            raise ValueError(f"Unexpected response format: {json.dumps(result)}")
        
        generated_text = result["content"]
        
        # Parse the response to extract category and sentiment
        category = next((label for label in candidate_labels if label in generated_text), "Other")
        sentiment = "Positive" if "Positive" in generated_text else "Negative"
        return category, sentiment
    except requests.RequestException as e:
        return "Error", f"Request failed: {str(e)}"
    except ValueError as e:
        return "Error", f"Parsing error: {str(e)}"

# Apply classification with progress bar
print(f"Classifying up to {len(df)} reviews...")
df_progress = tqdm(df.iterrows(), total=len(df), desc="Processing", unit="review")
for idx, row in df_progress:
    review_text = str(row[review_column]).replace("\n", " ")  # Sanitize newlines in the review text
    category, sentiment = classify_review(review_text)
    df.at[idx, "talks_about"] = category
    df.at[idx, "sentiment"] = sentiment

# Save the classified data
output_file = "data/reviews_with_sentiments_ollama.csv"
df.to_csv(output_file, index=False)
print(f"Classification complete. Results saved to '{output_file}'.")


Classifying up to 10 reviews...


Processing: 100%|██████████| 10/10 [00:32<00:00,  3.27s/review]

Classification complete. Results saved to 'data/reviews_with_sentiments_ollama.csv'.



