In [15]:
import pandas as pd

def calculate_metrics(csv_path):
    # Load the CSV file
    df = pd.read_csv(csv_path)

    # Initialize counters
    total_correct_predictions = 0
    total_predictions = len(df)
    true_positives = false_positives = true_negatives = false_negatives = 0

    # Grouped accuracies by category
    knowledge_correct = knowledge_total = 0
    textual_correct = textual_total = 0
    common_sense_correct = common_sense_total = 0

    # Accuracy and correct count by year
    year_accuracies = {}

    # Iterate through each row in the DataFrame
    for _, row in df.iterrows():
        label = row['LABEL']
        prediction = row['predictions']
        base = row['BASE'].lower()
        year = row['YEAR']

        # Check correct prediction
        if label == prediction:
            total_correct_predictions += 1
            if label == 0:
                true_negatives += 1
            else:
                true_positives += 1
        else:
            if label == 0:
                false_positives += 1
            else:
                false_negatives += 1

        # Update category-specific counters
        if base == 'knowledge based':
            knowledge_total += 1
            if label == prediction:
                knowledge_correct += 1
        elif base == 'sentimental':
            textual_total += 1
            if label == prediction:
                textual_correct += 1
        elif base == 'common sense':
            common_sense_total += 1
            if label == prediction:
                common_sense_correct += 1

        # Update year-specific counters
        if year not in year_accuracies:
            year_accuracies[year] = {'correct': 0, 'total': 0}
        year_accuracies[year]['total'] += 1
        if label == prediction:
            year_accuracies[year]['correct'] += 1

    # Calculate metrics
    fake_accuracy = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
    real_accuracy = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    knowledge_based_accuracy = knowledge_correct / knowledge_total if knowledge_total > 0 else 0
    textual_based_accuracy = textual_correct / textual_total if textual_total > 0 else 0
    common_sense_based_accuracy = common_sense_correct / common_sense_total if common_sense_total > 0 else 0
    overall_accuracy = total_correct_predictions / total_predictions if total_predictions > 0 else 0
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0

    # Find the year with the highest number of correct predictions
    best_year = max(year_accuracies.items(), key=lambda x: x[1]['correct'])[0]
    best_year_correct = year_accuracies[best_year]['correct']

    # Display results
    results = {
        'Fake Accuracy': fake_accuracy,
        'Real Accuracy': real_accuracy,
        'Knowledge Based Accuracy': knowledge_based_accuracy,
        'Textual Based Accuracy': textual_based_accuracy,
        'Common Sense Based Accuracy': common_sense_based_accuracy,
        'Overall Accuracy': overall_accuracy,
        'Precision': precision,
        'Recall': recall,
        'Best Year (Most Correct Predictions)': best_year,
        'Best Year Correct Predictions': best_year_correct
    }
    
    return results

# Example usage
csv_path = '/kaggle/input/wertery5u/predictionsDistil - predictions2.csv.csv'  # Replace with the actual path to your CSV file
metrics = calculate_metrics(csv_path)
for metric, value in metrics.items():
    if isinstance(value, float):
        print(f"{metric}: {value:.2f}")
    else:
        print(f"{metric}: {value}")


Fake Accuracy: 0.38
Real Accuracy: 1.00
Knowledge Based Accuracy: 0.65
Textual Based Accuracy: 0.76
Common Sense Based Accuracy: 0.68
Overall Accuracy: 0.69
Precision: 0.62
Recall: 1.00
Best Year (Most Correct Predictions): 2022.00
Best Year Correct Predictions: 8
