In [None]:
! pip install rouge_score

In [None]:
import csv
from rouge_score import rouge_scorer
from func_py import *

In [None]:
# Function to read a CSV file and return lists of text and speaker information
def read_csv(file_path):
    texts = []
    speakers = []
    with open(file_path, mode='r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row
        for row in reader:
            start_time = float(row[0])  # Convert start time to float
            end_time = float(row[1])    # Convert end time to float
            speaker = row[2]            # Keep speaker as a string
            text = row[3]               # Extract text content
            speakers.append(speaker)   # Append speaker information
            texts.append(text)         # Append text content
    return texts, speakers

# Function to compare speaker segmentation between machine-generated and human-transcribed data
def compare_speakers(machine_speakers, human_speakers):
    if len(machine_speakers) != len(human_speakers):
        raise ValueError("Length of machine speakers and human speakers lists do not match.")

    correct = 0
    total = len(machine_speakers)

    for m_spk, h_spk in zip(machine_speakers, human_speakers):
        if m_spk == h_spk:
            correct += 1

    accuracy = correct / total if total > 0 else 0
    print(f"Speaker Segmentation Accuracy: {accuracy:.4f}")

# Function to calculate ROUGE scores and display results
def calculate_rouge(machine_texts, human_texts):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

    total_rouge1, total_rouge2, total_rougeL = 0, 0, 0
    num_comparisons = min(len(machine_texts), len(human_texts))

    for i in range(num_comparisons):
        machine_text = machine_texts[i]
        human_text = human_texts[i]

        # Calculate ROUGE scores for the current segment
        scores = scorer.score(human_text, machine_text)

        # Print ROUGE scores for the current segment
        print(f"Comparison {i+1}:")
        print(f"Machine Text: {machine_text}")
        print(f"Human Text: {human_text}")
        print(f"ROUGE-1: {scores['rouge1'].fmeasure:.4f}")
        print(f"ROUGE-2: {scores['rouge2'].fmeasure:.4f}")
        print(f"ROUGE-L: {scores['rougeL'].fmeasure:.4f}")
        print("-" * 40)

        # Accumulate ROUGE scores
        total_rouge1 += scores['rouge1'].fmeasure
        total_rouge2 += scores['rouge2'].fmeasure
        total_rougeL += scores['rougeL'].fmeasure

    # Calculate and print average ROUGE scores
    avg_rouge1 = total_rouge1 / num_comparisons
    avg_rouge2 = total_rouge2 / num_comparisons
    avg_rougeL = total_rougeL / num_comparisons

    print("Overall Average ROUGE Scores:")
    print(f"Average ROUGE-1: {avg_rouge1:.4f}")
    print(f"Average ROUGE-2: {avg_rouge2:.4f}")
    print(f"Average ROUGE-L: {avg_rougeL:.4f}")


In [None]:
# # Example usage
# machine_texts, machine_speakers = read_csv('/content/drive/MyDrive/output.csv')
# human_texts, human_speakers = read_csv('/content/drive/MyDrive/human_transcription_samplecall1.csv')#change to the compared human_transcription.

# # Calculate ROUGE scores
# calculate_rouge(machine_texts, human_texts)

# # Compare speakers
# compare_speakers(machine_speakers, human_speakers)