In [1]:
from transformers import pipeline
import pandas as pd
from flair.models import TextClassifier
from flair.data import Sentence
import torch
import os

# Function for BERT sentiment analysis using Hugging Face transformers
def bert_sentiment(text):
    sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
    result = sentiment_analyzer(text)[0]
    label = result['label'].lower()
    score = result['score']
    # Convert label to positive, negative, or neutral
    if "1 star" in label or "2 stars" in label:
        sentiment = "negative"
    elif "4 stars" in label or "5 stars" in label:
        sentiment = "positive"
    else:
        sentiment = "neutral"
    return sentiment, score, "BERT"

# Function for Flair sentiment analysis
def flair_sentiment(text):
    classifier = TextClassifier.load('en-sentiment')
    sentence = Sentence(text)
    classifier.predict(sentence)
    label = sentence.labels[0].value.lower()
    score = sentence.labels[0].score
    return label, score, "Flair"

# Function for DistilBERT sentiment analysis
def distilbert_sentiment(text):
    sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
    result = sentiment_analyzer(text)[0]
    sentiment = result['label'].lower()
    score = result['score']
    return sentiment, score, "DistilBERT"

# Function to determine which model to use based on text length
def sentiment_analysis(text):
    # Tokenize text to determine length
    token_length = len(text.split())
    
    # If token length is <= 512, use BERT
    if token_length <= 512:
        try:
            return bert_sentiment(text)
        except Exception as e:
            print(f"BERT failed with error: {e}")
    
    # If token length exceeds 512, or BERT fails, use Flair
    if token_length > 512 or torch.cuda.is_available() == False:
        try:
            return flair_sentiment(text)
        except Exception as e:
            print(f"Flair failed with error: {e}")
    
    # If Flair also fails, use DistilBERT
    try:
        return distilbert_sentiment(text)
    except Exception as e:
        print(f"DistilBERT failed with error: {e}")

    # If all models fail, return neutral sentiment
    return "neutral", 0.0, "None"

# Function to process all CSV files in the list
def process_all_csv_files(file_list):
    with open(file_list, 'r') as f:
        files = [line.strip() for line in f.readlines()]
        for file_path in files:
            if os.path.exists(file_path) and file_path.endswith('.csv'):
                print(f"Processing file: {file_path}")
                process_csv_file(file_path)
            else:
                print(f"Skipping invalid or missing file: {file_path}")

# Function to process a single CSV file
def process_csv_file(input_csv):
    df = pd.read_csv(input_csv)

    # Assuming the review text is in columns named 'Title' and 'Review'
    df['Title'] = df['Title'].astype(str).fillna('')  # Ensure all values are strings and handle missing values
    df['Review'] = df['Review'].astype(str).fillna('')

    sentiments = []
    scores = []
    methods = []
    for title, review in zip(df['Title'], df['Review']):
        combined_text = f"{title} {review}".strip()  # Combine title and review text
        if combined_text:  # Ensure combined text is non-empty
            sentiment, score, method = sentiment_analysis(combined_text)
        else:
            sentiment, score, method = "neutral", 0.0, "None"  # Handle empty or invalid combined text
        sentiments.append(sentiment)
        scores.append(score)
        methods.append(method)

    # Adding the sentiment, value, and method columns to the DataFrame
    df['Sentiment'] = sentiments
    df['Value'] = scores
    df['Method Used'] = methods

    # Saving the updated DataFrame to the same CSV file
    df.to_csv(input_csv, index=False)

    # Displaying the updated DataFrame as a table
    print(df)

# Specify the file containing the list of CSV files to process
file_list = "./TodoSentiment.list"  # Replace with the path to your list file
process_all_csv_files(file_list)


2024-12-04 12:19:43.198991: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-04 12:19:43.247445: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-04 12:19:43.248599: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


FileNotFoundError: [Errno 2] No such file or directory: './TodoSentiment.list'