In [None]:
# Mount Google Drive for file access
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import os

# Define the column names that match the expected structure of your metrics CSVs
column_names = ['question', 'ground_truth', 'rag_answer', 'contexts', 'response_times',
                'cpu_start_usages', 'cpu_end_usages', 'average_cpu_usages', 'average_gpu_usages',
                'context_relevance', 'answer_relevance', 'groundedness', 'answer_correctness',
                'human_judge_score']

# Path to the directory containing the metric files
metrics_directory = '/content/drive/MyDrive/M8 _ Local Rag Agent with LlaMa3/Shruti/results'

# List to store all the metric DataFrames
metrics_list = []

# Loop through all CSV files in the directory and load them into DataFrames
for file in os.listdir(metrics_directory):
    if file.endswith('.csv'):  # Assuming the metrics files are CSVs
        file_path = os.path.join(metrics_directory, file)
        try:
            df = pd.read_csv(file_path, header=None, names=column_names)  # Load without headers and assign column names
            metrics_list.append(df)
        except Exception as e:
            print(f"Error reading {file_path}: {e}")

# Check if any DataFrames were read successfully
if not metrics_list:
    raise ValueError("No valid CSV files found or read in the specified directory.")

# Concatenate all DataFrames into one DataFrame
combined_df = pd.concat(metrics_list, ignore_index=True)

# Columns for correctness of RAG answers
correctness_columns = ['context_relevance', 'answer_relevance', 'groundedness', 'answer_correctness', 'human_judge_score']

# Ensure that the correctness columns are numeric
combined_df[correctness_columns] = combined_df[correctness_columns].apply(pd.to_numeric, errors='coerce')

# Compute the average correctness of RAG answers for the entire dataset
rag_correctness_avg = combined_df[correctness_columns].mean(axis=0)

# Convert the computed averages to a DataFrame for easy viewing
rag_correctness_avg_df = pd.DataFrame(rag_correctness_avg).transpose()

# Add the "model" column with the value "Ensemble Retriever"
rag_correctness_avg_df.insert(0, 'model', 'RAG Agent with Routing (Llama 3)')

# Display the average correctness of RAG answers
print("\nAverage correctness of RAG answers:")
print(rag_correctness_avg_df)

# CPU and GPU related columns for performance metrics
cpu_columns = ['response_times', 'cpu_start_usages', 'cpu_end_usages', 'average_cpu_usages', 'average_gpu_usages']

# Ensure that the CPU columns are numeric
combined_df[cpu_columns] = combined_df[cpu_columns].apply(pd.to_numeric, errors='coerce')

# Compute the average CPU performance metrics for the entire dataset
cpu_performance_avg = combined_df[cpu_columns].mean(axis=0)

# Convert the computed averages to a DataFrame for easy viewing
cpu_performance_avg_df = pd.DataFrame(cpu_performance_avg).transpose()

# Add the "model" column with the value "RAG Agent with Routing"
cpu_performance_avg_df.insert(0, 'model', 'RAG Agent with Routing (Llama 3)')

# Display the average CPU performance metrics
print("\nAverage CPU performance metrics:")
print(cpu_performance_avg_df)


Average correctness of RAG answers:
                              model  context_relevance  answer_relevance  \
0  RAG Agent with Routing (Llama 3)               8.87              8.95   

   groundedness  answer_correctness  human_judge_score  
0         8.755            0.841926               8.77  

Average CPU performance metrics:
                              model  response_times  cpu_start_usages  \
0  RAG Agent with Routing (Llama 3)          6.8651             3.083   

   cpu_end_usages  average_cpu_usages  average_gpu_usages  
0          3.0675             3.23735            15.63805  


In [None]:
rag_correctness_avg_df

Unnamed: 0,model,context_relevance,answer_relevance,groundedness,answer_correctness,human_judge_score
0,RAG Agent with Routing (Llama 3),8.87,8.95,8.755,0.841926,8.77


In [None]:
cpu_performance_avg_df

Unnamed: 0,model,response_times,cpu_start_usages,cpu_end_usages,average_cpu_usages,average_gpu_usages
0,RAG Agent with Routing (Llama 3),6.8651,3.083,3.0675,3.23735,15.63805
