# Micro and Macro Metrics

In [1]:
def evaluate_aspect_and_sentiment_micro_macro(predictions_list, ground_truth_list):
    micro_aspect_tp = micro_aspect_fp = micro_aspect_fn = 0
    micro_sentiment_tp = micro_sentiment_fp = micro_sentiment_fn = 0
    
    macro_aspect_results = []
    macro_sentiment_results = []
    
    for predictions, ground_truth in zip(predictions_list, ground_truth_list):
        aspect_matches = []
        sentiment_matches = []
        matched_ground_truth_indices = set()

        for pred in predictions:
            for index, truth in enumerate(ground_truth):
                if index in matched_ground_truth_indices:
                    continue  # Skip already matched ground truth

                # Check aspect match (aspect term and category)
                try:
                  if pred[0].lower() == truth[0].lower() and pred[3] == truth[3]:
                      aspect_matches.append((pred, truth))
                      matched_ground_truth_indices.add(index)
                      
                      # Check sentiment match (only if aspect matches)
                      if pred[1] == truth[1]:
                          sentiment_matches.append((pred, truth))
                      break
                except Exception as e:
                  print(e)
                  print(pred)
                  print(truth)
                  return 

        # Micro calculations for aspects
        micro_aspect_tp += len(aspect_matches)
        micro_aspect_fp += len(predictions) - len(aspect_matches)
        micro_aspect_fn += len(ground_truth) - len(aspect_matches)

        # Micro calculations for sentiments
        micro_sentiment_tp += len(sentiment_matches)
        micro_sentiment_fp += len(aspect_matches) - len(sentiment_matches)
        micro_sentiment_fn += len(ground_truth) - len(sentiment_matches)

        # Macro calculations for aspects
        aspect_precision = len(aspect_matches) / len(predictions) if predictions else 0
        aspect_recall = len(aspect_matches) / len(ground_truth) if ground_truth else 0
        aspect_f1 = 2 * aspect_precision * aspect_recall / (aspect_precision + aspect_recall) if (aspect_precision + aspect_recall) > 0 else 0
        macro_aspect_results.append((aspect_precision, aspect_recall, aspect_f1))

        # Macro calculations for sentiments
        sentiment_precision = len(sentiment_matches) / len(aspect_matches) if aspect_matches else 0
        sentiment_recall = len(sentiment_matches) / len(ground_truth) if ground_truth else 0
        sentiment_f1 = 2 * sentiment_precision * sentiment_recall / (sentiment_precision + sentiment_recall) if (sentiment_precision + sentiment_recall) > 0 else 0
        macro_sentiment_results.append((sentiment_precision, sentiment_recall, sentiment_f1))
    
    # Micro metrics
    micro_aspect_precision = micro_aspect_tp / (micro_aspect_tp + micro_aspect_fp) if micro_aspect_tp + micro_aspect_fp > 0 else 0
    micro_aspect_recall = micro_aspect_tp / (micro_aspect_tp + micro_aspect_fn) if micro_aspect_tp + micro_aspect_fn > 0 else 0
    micro_aspect_f1 = 2 * micro_aspect_precision * micro_aspect_recall / (micro_aspect_precision + micro_aspect_recall) if (micro_aspect_precision + micro_aspect_recall) > 0 else 0
    
    micro_sentiment_precision = micro_sentiment_tp / (micro_sentiment_tp + micro_sentiment_fp) if micro_sentiment_tp + micro_sentiment_fp > 0 else 0
    micro_sentiment_recall = micro_sentiment_tp / (micro_sentiment_tp + micro_sentiment_fn) if micro_sentiment_tp + micro_sentiment_fn > 0 else 0
    micro_sentiment_f1 = 2 * micro_sentiment_precision * micro_sentiment_recall / (micro_sentiment_precision + micro_sentiment_recall) if (micro_sentiment_precision + micro_sentiment_recall) > 0 else 0

    # Macro metrics
    macro_aspect_precision, macro_aspect_recall, macro_aspect_f1 = zip(*macro_aspect_results)
    macro_sentiment_precision, macro_sentiment_recall, macro_sentiment_f1 = zip(*macro_sentiment_results)

    return {
        "Micro": {
            "Aspect": {"Precision": micro_aspect_precision, "Recall": micro_aspect_recall, "F1": micro_aspect_f1},
            "Sentiment": {"Precision": micro_sentiment_precision, "Recall": micro_sentiment_recall, "F1": micro_sentiment_f1}
        },
        "Macro": {
            "Aspect": {"Precision": sum(macro_aspect_precision) / len(macro_aspect_precision), "Recall": sum(macro_aspect_recall) / len(macro_aspect_recall), "F1": sum(macro_aspect_f1) / len(macro_aspect_f1)},
            "Sentiment": {"Precision": sum(macro_sentiment_precision) / len(macro_sentiment_precision), "Recall": sum(macro_sentiment_recall) / len(macro_sentiment_recall), "F1": sum(macro_sentiment_f1) / len(macro_sentiment_f1)}
        }
    }

# Example usage
predictions_list = [
    [("food", "positive", "good", "FOOD#QUALITY"), ("service", "negative", "bad", "SERVICE#GENERAL")],
    [("food", "positive", "delicious", "FOOD#QUALITY")]
]
ground_truth_list = [
    [("food", "positive", "delicious", "FOOD#QUALITY"), ("service", "negative", "poor", "SERVICE#GENERAL")],
    [("food", "positive", "tasty", "FOOD#QUALITY")]
]

results = evaluate_aspect_and_sentiment_micro_macro(predictions_list, ground_truth_list)
print(results)

{'Micro': {'Aspect': {'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}, 'Sentiment': {'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}}, 'Macro': {'Aspect': {'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}, 'Sentiment': {'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}}}


In [4]:
import json

# Function to read data from JSONL file
def read_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return data

# Convert data to format required for evaluation
def convert_data_for_evaluation(data, name='Quadruples'):
    aspect_sentiment_pairs = []
    for item in data:
        predictions = item.get(name, [])
        aspect_sentiment_pairs.append([(pred['aspect'], pred['polarity'], pred['opinion'], pred['category']) for pred in predictions])
    return aspect_sentiment_pairs

# Paths to output JSONL files containing predictions
output_files = ["metric_train_set.jsonl"]
# Paths to ground truth JSONL files
ground_truth_files = ["combined_train.jsonl"]

# Read predictions from output files
predictions_list = []
for output_file in output_files:
    predictions_data = read_jsonl(output_file)
    predictions_list.append(convert_data_for_evaluation(predictions_data))

# Read ground truth from ground truth files
ground_truth_list = []
for ground_truth_file in ground_truth_files:
    ground_truth_data = read_jsonl(ground_truth_file)
    ground_truth_list.append(convert_data_for_evaluation(ground_truth_data, name='labels'))
    
print("#####Testing Metrics#####")
results = evaluate_aspect_and_sentiment_micro_macro(predictions_list[0], ground_truth_list[0])
print(results)

#####Testing Metrics#####
{'Micro': {'Aspect': {'Precision': 0.5596638655462185, 'Recall': 0.28583690987124466, 'F1': 0.3784090909090909}, 'Sentiment': {'Precision': 0.9309309309309309, 'Recall': 0.26609442060085836, 'F1': 0.41388518024032045}}, 'Macro': {'Aspect': {'Precision': 0.5320595238095239, 'Recall': 0.35135200420494533, 'F1': 0.40559680021560807}, 'Sentiment': {'Precision': 0.7667499999999999, 'Recall': 0.33369092916887033, 'F1': 0.4369640098034061}}}


In [5]:
import json
import csv

def process_jsonl(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            entry = json.loads(line)
            data.append(entry)
    return data

def write_to_csv(data1, data2, csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['review_id', 'term_actual', 'term_predicted', 'opinion_actual', 'opinion_predicted', 'aspect_actual', 'aspect_predicted', 'sentiment_actual', 'sentiment_predicted', 'review_len'])
        for idx, entry in enumerate(data1):
            review_id = idx
            text = entry.get('text')
            labels = entry.get('labels')
            predicted = data2[idx].get('Quadruples')
            review_len = len(text.split())
            for label in labels:
                flag = False
                for idx, pred in enumerate(predicted):
                  if label['aspect'].lower() == pred['aspect'].lower():
                    writer.writerow([review_id, label['aspect'], pred['aspect'], label['opinion'], pred['opinion'], label['category'], pred['category'], label['polarity'], pred['polarity'], review_len])
                    predicted.pop(idx)
                    flag = True
                    break
                if not flag:
                    writer.writerow([review_id, label['aspect'], 'NA', label['opinion'], 'NA', label['category'], 'NA', label['polarity'], 'NA', review_len])
            for pred in predicted:
                writer.writerow([review_id, 'NA', pred['aspect'], 'NA', pred['opinion'], 'NA', pred['category'], 'NA', pred['polarity'], review_len])
                

# Process the first JSONL file
data_file1 = "combined_train.jsonl"
data1 = process_jsonl(data_file1)

# Process the second JSONL file
data_file2 = "metric_train_set.jsonl"
data2 = process_jsonl(data_file2)

# Write the combined data to CSV
output_csv = "output.csv"
write_to_csv(data1, data2, output_csv)

print("CSV file created successfully!")


CSV file created successfully!
