In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
# Function to parse the QREL file
def parse_qrels(qrels_file):
    qrels = {}
    with open(qrels_file, 'r') as file:
        for line in file:
            query_id, _, doc_id, relevance = line.split()
            relevance = int(relevance)
            if query_id not in qrels:
                qrels[query_id] = {}
            qrels[query_id][doc_id] = relevance
    return qrels

In [3]:
# Function to calculate precision at k
def precision_at_k(relevant, retrieved, k):
    if k > len(retrieved):
        k = len(retrieved)
    retrieved_at_k = retrieved[:k]
    true_positives = sum([1 for doc_id in retrieved_at_k if doc_id in relevant and relevant[doc_id] > 0])
    return true_positives / k

# Function to calculate Mean Average Precision (MAP) at k
def mean_average_precision_at_k(relevant, retrieved, k):
    precision_values = []
    relevant_count = 0
    for i, doc_id in enumerate(retrieved[:k]):
        if doc_id in relevant and relevant[doc_id] > 0:
            relevant_count += 1
            precision_values.append(relevant_count / (i + 1))  # Calculate precision at each relevant document rank
    if not precision_values:
        return 0.0
    return sum(precision_values) / len(precision_values)

# Function to calculate P and MAP
def calculate_metrics(df, qrel_dict):
    results = []
    grouped = df.groupby('topicId')
    
    for topic, group in grouped:
        relevant_docs = qrel_dict.get(str(topic), {})  # Ensure topic is a string
        retrieved_docs = group.sort_values('ranking')['docId'].tolist()
        
        p_at_10 = precision_at_k(relevant_docs, retrieved_docs, 10)
        p_at_100 = precision_at_k(relevant_docs, retrieved_docs, 100)
        map_at_100 = mean_average_precision_at_k(relevant_docs, retrieved_docs, 100) #MAP
        map_at_10 = mean_average_precision_at_k(relevant_docs, retrieved_docs, 10) #MAP
        
        results.append({'topicId': topic, 'P@10': p_at_10, 'P@100': p_at_100, 'MAP@100': map_at_100, 'MAP@10': map_at_10})
    
    return results

In [4]:
# Define paths
qrel_file_path = 'D:\\VSCODE PROJECT\\IR\\dataset\\qrels.trec8.csv'
input_dir = 'D:\\VSCODE PROJECT\\IR\\cleaned'  # Replace with your input files directory
output_dir = 'D:\\VSCODE PROJECT\\IR\\scores'
output_file_name = 'final_results.csv'
output_path = os.path.join(output_dir, output_file_name)


In [5]:
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Parse the QREL file
qrel_dict = parse_qrels(qrel_file_path)

# Get the list of input files
input_files = [os.path.join(input_dir, file) for file in os.listdir(input_dir) if file.endswith('.csv')]

# Initialize a DataFrame to store all results
all_results = []

# Process each input file
for file in input_files:
    try:
        # Read the input dataset
        input_df = pd.read_csv(file, delimiter=',', names=['topicId', 'identifier', 'docId', 'ranking', 'similarityScore', 'systemName'])
        
        # Calculate metrics
        results = calculate_metrics(input_df, qrel_dict)
        
        # Convert to DataFrame
        results_df = pd.DataFrame(results)
        results_df['system'] = os.path.basename(file).split('.')[0]
        
        # Append to all results
        all_results.append(results_df)
    except Exception as e:
        print(f"Error processing file {file}: {e}")

In [6]:
# Concatenate all results into a single DataFrame
if all_results:
    final_results = pd.concat(all_results, ignore_index=True)
    # Save the final results to a file
    final_results.to_csv(output_path, index=False)
    
    # Display the final results in separate tables for P@10, P@100, AP@100, and MAP@100
    table_format_p10 = final_results.pivot(index='topicId', columns='system', values='P@10')
    table_format_p100 = final_results.pivot(index='topicId', columns='system', values='P@100')
    table_format_map100 = final_results.pivot(index='topicId', columns='system', values='MAP@100')
    table_format_map10 = final_results.pivot(index='topicId', columns='system', values='MAP@10')

else:
    print("No valid data found to concatenate.")


In [7]:

print("\nTable for P@10:")
display(table_format_p10)

table_format_p10.to_csv(index='topicId')
# Save the cleaned DataFrame back to a file
save_path = "D:\VSCODE PROJECT\IR\scores\p@10.csv"

table_format_p10.to_csv(save_path, index='topicId')

# Create a figure and axis
plt.figure(figsize=(10, 4))  # Example: width = 10, height = 4
fig, ax = plt.subplots()

# Hide axes for cleaner table appearance
ax.axis('off')

# Create table and add row labels
table = ax.table(
    cellText=table_format_p10.values, 
    colLabels=table_format_p10.columns,
    rowLabels=table_format_p10.index,
    loc='center'
)

# Save the table as an image
plt.savefig(f"p@10_table.png", bbox_inches='tight', dpi=300)  # Customize filename and format
plt.close(fig)  # Close the figure to avoid memory leaks


Table for P@10:


system,Dm8Nbn,Flab8at,Flab8ax,GE8MTD2,MITSLStd,Mer8Adtd2,UB99T,apl8c221,att99ate,isa25t,mds08a2,ok8amxc,pir9Aa1,plt8ah2,ric8dpn
topicId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
401,0.0,0.2,0.3,1.0,0.3,0.2,0.0,0.7,0.4,0.0,0.0,0.0,0.0,0.0,0.0
402,0.6,0.6,0.9,0.6,0.7,0.6,0.0,0.8,0.6,0.0,0.5,0.6,0.6,0.5,0.5
403,0.6,0.9,1.0,1.0,0.9,0.7,0.5,1.0,0.9,0.3,0.9,1.0,0.9,0.9,0.9
404,0.1,0.2,0.3,0.4,0.4,0.4,0.1,0.4,0.2,0.0,0.0,0.6,0.6,0.3,0.2
405,0.2,0.5,0.4,0.6,0.2,0.3,0.1,0.6,0.3,0.0,0.4,0.3,0.4,0.3,0.4
406,0.2,0.5,0.5,0.4,0.4,0.4,0.0,0.6,0.5,0.0,0.3,0.6,0.4,0.4,0.5
407,0.3,0.9,1.0,0.6,0.7,0.8,0.1,0.8,0.9,0.8,0.5,0.9,0.5,0.4,0.3
408,0.1,0.3,0.4,0.7,0.4,0.4,0.0,0.3,0.2,0.6,0.4,0.6,0.5,0.5,0.4
409,0.2,0.2,0.2,0.4,0.4,0.3,0.1,0.3,0.1,0.0,0.2,0.3,0.3,0.2,0.2
410,0.5,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.9


<Figure size 1000x400 with 0 Axes>

In [8]:
print("\nTable for P@100:")
display(table_format_p100)

table_format_p100.to_csv(index='topicId')
# Save the cleaned DataFrame back to a file
save_path = "D:\VSCODE PROJECT\IR\scores\p@100.csv"

table_format_p100.to_csv(save_path, index='topicId')

# Create a figure and axis
plt.figure(figsize=(10, 4))  # Example: width = 10, height = 4
fig, ax = plt.subplots()

# Hide axes for cleaner table appearance
ax.axis('off')

# Create table and add row labels
table = ax.table(
    cellText=table_format_p100.values, 
    colLabels=table_format_p100.columns,
    rowLabels=table_format_p100.index,
    loc='center'
)

# Save the table as an image
plt.savefig(f"p@100_table.png", bbox_inches='tight', dpi=300)  # Customize filename and format
plt.close(fig)  # Close the figure to avoid memory leaks



Table for P@100:


system,Dm8Nbn,Flab8at,Flab8ax,GE8MTD2,MITSLStd,Mer8Adtd2,UB99T,apl8c221,att99ate,isa25t,mds08a2,ok8amxc,pir9Aa1,plt8ah2,ric8dpn
topicId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
401,0.04,0.14,0.1,0.82,0.2,0.11,0.03,0.49,0.32,0.03,0.03,0.05,0.05,0.01,0.0
402,0.19,0.24,0.36,0.31,0.23,0.2,0.06,0.36,0.23,0.06,0.22,0.31,0.31,0.19,0.25
403,0.18,0.21,0.21,0.2,0.21,0.19,0.11,0.21,0.21,0.261538,0.2,0.2,0.2,0.18,0.18
404,0.04,0.23,0.25,0.24,0.27,0.28,0.13,0.29,0.26,0.0,0.3,0.29,0.3,0.3,0.28
405,0.05,0.13,0.19,0.22,0.12,0.14,0.06,0.25,0.17,0.0,0.12,0.15,0.12,0.09,0.14
406,0.02,0.13,0.11,0.1,0.12,0.09,0.01,0.13,0.12,0.0,0.13,0.12,0.1,0.08,0.13
407,0.1,0.35,0.39,0.22,0.29,0.23,0.07,0.3,0.44,0.12,0.24,0.33,0.25,0.18,0.29
408,0.16,0.39,0.43,0.31,0.42,0.36,0.07,0.44,0.33,0.25,0.36,0.39,0.42,0.27,0.38
409,0.04,0.11,0.1,0.12,0.09,0.1,0.04,0.11,0.07,0.0,0.1,0.1,0.09,0.11,0.1
410,0.17,0.6,0.6,0.51,0.62,0.58,0.01,0.57,0.6,0.0,0.59,0.59,0.45,0.03,0.61


<Figure size 1000x400 with 0 Axes>

In [9]:
print("\nTable for MAP@100:")
display(table_format_map100.round(4))

table=table_format_map100.round(4)

table.to_csv(index='topicId')
# Save the cleaned DataFrame back to a file
save_path = "D:\VSCODE PROJECT\IR\scores\map@100.csv"

table.to_csv(save_path, index='topicId')

# Create a figure and axis
plt.figure(figsize=(10, 4))  # Example: width = 10, height = 4
fig, ax = plt.subplots()

# Hide axes for cleaner table appearance
ax.axis('off')

# Create table and add row labels
table = ax.table(
    cellText=table.values, 
    colLabels=table.columns,
    rowLabels=table.index,
    loc='center'
)

# Save the table as an image
plt.savefig(f"map@100_table.png", bbox_inches='tight', dpi=300)  # Customize filename and format
plt.close(fig)  # Close the figure to avoid memory leaks


Table for MAP@100:


system,Dm8Nbn,Flab8at,Flab8ax,GE8MTD2,MITSLStd,Mer8Adtd2,UB99T,apl8c221,att99ate,isa25t,mds08a2,ok8amxc,pir9Aa1,plt8ah2,ric8dpn
topicId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
401,0.0384,0.1981,0.3038,0.9232,0.2955,0.2266,0.0312,0.6782,0.4075,0.0365,0.0398,0.0554,0.077,0.0103,0.0
402,0.3378,0.4508,0.6619,0.5059,0.5758,0.4679,0.0712,0.6503,0.5397,0.1252,0.4656,0.5376,0.4969,0.4268,0.4706
403,0.4987,0.8453,0.9082,0.9057,0.8333,0.6014,0.3463,0.8817,0.8194,0.3876,0.791,0.8805,0.7669,0.6994,0.7913
404,0.0911,0.2086,0.3194,0.3807,0.4395,0.4053,0.1695,0.3754,0.2435,0.0,0.2367,0.4433,0.4184,0.3134,0.2516
405,0.3198,0.4115,0.4296,0.5817,0.3055,0.2658,0.1226,0.4535,0.2961,0.0,0.3055,0.3971,0.383,0.3966,0.3164
406,0.8333,0.455,0.4385,0.506,0.5183,0.4479,0.0192,0.4733,0.5316,0.0,0.3068,0.5071,0.4179,0.3947,0.4075
407,0.1759,0.6993,0.6646,0.5886,0.6395,0.5477,0.2092,0.5982,0.746,0.7388,0.499,0.6809,0.4445,0.3871,0.3508
408,0.2336,0.4193,0.4664,0.5342,0.491,0.4201,0.0738,0.4363,0.34,0.4226,0.3813,0.4409,0.5075,0.388,0.3892
409,0.477,0.2384,0.2598,0.4509,0.3497,0.3772,0.1121,0.3767,0.2354,0.0,0.2496,0.3253,0.3766,0.2333,0.2135
410,0.3691,0.9287,0.9319,0.8785,0.8957,0.9178,0.0303,0.8724,0.9138,0.0,0.8886,0.9272,0.8056,0.057,0.8449


<Figure size 1000x400 with 0 Axes>

In [10]:
print("\nTable for MAP@10:")
display(table_format_map10.round(4))

table=table_format_map10.round(4)

# Save the cleaned DataFrame back to a file
save_path = "D:\VSCODE PROJECT\IR\scores\map@10.csv"

table.to_csv(save_path, index='topicId')

# Create a figure and axis
plt.figure(figsize=(10, 4))  # Example: width = 10, height = 4
fig, ax = plt.subplots()

# Hide axes for cleaner table appearance
ax.axis('off')

# Create table and add row labels
table = ax.table(
    cellText=table.values, 
    colLabels=table.columns,
    rowLabels=table.index,
    loc='center'
)

# Save the table as an image
plt.savefig(f"map@10_table.png", bbox_inches='tight', dpi=300)  # Customize filename and format
plt.close(fig)  # Close the figure to avoid memory leaks


Table for MAP@10:


system,Dm8Nbn,Flab8at,Flab8ax,GE8MTD2,MITSLStd,Mer8Adtd2,UB99T,apl8c221,att99ate,isa25t,mds08a2,ok8amxc,pir9Aa1,plt8ah2,ric8dpn
topicId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
401,0.0,0.6,0.7222,1.0,0.5333,0.6667,0.0,0.9095,0.6071,0.0,0.0,0.0,0.0,0.0,0.0
402,0.514,0.8722,0.9889,0.9762,0.9214,0.7885,0.0,0.9594,0.8708,0.0,0.8583,0.8556,0.7345,0.7278,0.8529
403,0.6787,0.9889,1.0,1.0,0.8783,0.6798,0.52,1.0,0.9627,0.2508,0.7857,1.0,0.8412,0.7857,0.9283
404,0.2,0.1825,0.6389,0.625,0.8542,0.4683,0.1111,0.8542,0.2262,0.0,0.0,0.5954,0.8708,0.2508,0.1556
405,0.5833,0.5944,0.7986,0.7885,0.35,0.373,0.1429,0.7958,0.3873,0.0,0.3604,0.8056,0.5528,0.7917,0.4833
406,0.8333,0.7111,0.5587,0.747,0.9167,0.6349,0.0,0.5926,0.7962,0.0,0.4778,0.7486,0.7333,0.5667,0.6533
407,0.3407,0.9627,1.0,0.8774,0.787,0.8389,1.0,0.9472,1.0,0.8106,0.6676,0.9765,0.62,0.7708,0.625
408,0.2,0.9167,0.7875,0.8976,0.95,0.7153,0.0,0.5167,0.5833,0.5926,0.7333,0.7438,0.9267,0.7117,0.8542
409,0.8333,0.625,0.6429,1.0,0.5048,0.7222,0.1111,0.9167,1.0,0.0,0.6111,0.7778,0.8333,0.6,0.4167
410,0.5422,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.9627


<Figure size 1000x400 with 0 Axes>

In [11]:
import os
import pandas as pd

# Path to the final results CSV file
output_path = r'D:\VSCODE PROJECT\IR\scores\final_results.csv'

# Load the final results DataFrame
final_results = pd.read_csv(output_path)

# Compute average scores for each system
average_scores = final_results.groupby('system').mean().reset_index()

# Select relevant columns for output
average_scores = average_scores[['system', 'P@10', 'P@100', 'MAP@100', 'MAP@10']]

# Sort the average scores in ascending order
average_scores = average_scores.sort_values(by=['P@10', 'P@100', 'MAP@100', 'MAP@10'], ascending=False).reset_index(drop=True)

# Print the average scores for each system in a table
print("Average P@10, P@100, MAP@100, MAP@10 for each system:")
display(average_scores)

# Ensure the output directory exists
output_directory = r'D:\VSCODE PROJECT\IR\scores'
os.makedirs(output_directory, exist_ok=True)

# Save the average scores to a CSV file
average_scores.to_csv(os.path.join(output_directory, 'average_scores.csv'), index=False)

# Output in a readable format
for _, row in average_scores.iterrows():
    print(f"System {row['system']}:")
    print(f"  Average Precision@10: {row['P@10']:.4f}")
    print(f"  Average Precision@100: {row['P@100']:.4f}")
    print(f"  Average MAP@100: {row['MAP@100']:.4f}")
    print(f"  Average MAP@10: {row['MAP@10']:.4f}")


Average P@10, P@100, MAP@100, MAP@10 for each system:


Unnamed: 0,system,P@10,P@100,MAP@100,MAP@10
0,GE8MTD2,0.602,0.2718,0.562747,0.777742
1,ok8amxc,0.55,0.268,0.503462,0.680115
2,Flab8ax,0.524,0.269,0.490554,0.682697
3,MITSLStd,0.508,0.2558,0.490214,0.677734
4,apl8c221,0.504,0.2558,0.467786,0.663076
5,Flab8at,0.486,0.2554,0.467668,0.623027
6,att99ate,0.476,0.2498,0.439029,0.578223
7,ric8dpn,0.45,0.2252,0.430575,0.619269
8,pir9Aa1,0.45,0.2128,0.427635,0.623682
9,Mer8Adtd2,0.444,0.2096,0.423007,0.614812


System GE8MTD2:
  Average Precision@10: 0.6020
  Average Precision@100: 0.2718
  Average MAP@100: 0.5627
  Average MAP@10: 0.7777
System ok8amxc:
  Average Precision@10: 0.5500
  Average Precision@100: 0.2680
  Average MAP@100: 0.5035
  Average MAP@10: 0.6801
System Flab8ax:
  Average Precision@10: 0.5240
  Average Precision@100: 0.2690
  Average MAP@100: 0.4906
  Average MAP@10: 0.6827
System MITSLStd:
  Average Precision@10: 0.5080
  Average Precision@100: 0.2558
  Average MAP@100: 0.4902
  Average MAP@10: 0.6777
System apl8c221:
  Average Precision@10: 0.5040
  Average Precision@100: 0.2558
  Average MAP@100: 0.4678
  Average MAP@10: 0.6631
System Flab8at:
  Average Precision@10: 0.4860
  Average Precision@100: 0.2554
  Average MAP@100: 0.4677
  Average MAP@10: 0.6230
System att99ate:
  Average Precision@10: 0.4760
  Average Precision@100: 0.2498
  Average MAP@100: 0.4390
  Average MAP@10: 0.5782
System ric8dpn:
  Average Precision@10: 0.4500
  Average Precision@100: 0.2252
  Averag

In [14]:

# Load the CSV file into a DataFrame
df = average_scores

# Sort by each metric in descending order
sorted_by_P10 = df[['system', 'P@10']].sort_values(by='P@10', ascending=False)
sorted_by_P100 = df[['system', 'P@100']].sort_values(by='P@100', ascending=False)
sorted_by_MAP100 = df[['system', 'MAP@100']].sort_values(by='MAP@100', ascending=False)
sorted_by_MAP10 = df[['system', 'MAP@10']].sort_values(by='MAP@10', ascending=False)

# Display the sorted DataFrames
print("Average of P@10:")
display(sorted_by_P10)
print("\nAverage of P@100:")
display(sorted_by_P100)
print("\nAverage of MAP@100:")
display(sorted_by_MAP100)
print("\nAverage of MAP@10:")
display(sorted_by_MAP10)



Average of P@10:


Unnamed: 0,system,P@10
0,GE8MTD2,0.602
1,ok8amxc,0.55
2,Flab8ax,0.524
3,MITSLStd,0.508
4,apl8c221,0.504
5,Flab8at,0.486
6,att99ate,0.476
7,ric8dpn,0.45
8,pir9Aa1,0.45
9,Mer8Adtd2,0.444



Average of P@100:


Unnamed: 0,system,P@100
0,GE8MTD2,0.2718
2,Flab8ax,0.269
1,ok8amxc,0.268
3,MITSLStd,0.2558
4,apl8c221,0.2558
5,Flab8at,0.2554
6,att99ate,0.2498
7,ric8dpn,0.2252
8,pir9Aa1,0.2128
9,Mer8Adtd2,0.2096



Average of MAP@100:


Unnamed: 0,system,MAP@100
0,GE8MTD2,0.562747
1,ok8amxc,0.503462
2,Flab8ax,0.490554
3,MITSLStd,0.490214
4,apl8c221,0.467786
5,Flab8at,0.467668
6,att99ate,0.439029
7,ric8dpn,0.430575
8,pir9Aa1,0.427635
9,Mer8Adtd2,0.423007



Average of MAP@10:


Unnamed: 0,system,MAP@10
0,GE8MTD2,0.777742
2,Flab8ax,0.682697
1,ok8amxc,0.680115
3,MITSLStd,0.677734
4,apl8c221,0.663076
8,pir9Aa1,0.623682
5,Flab8at,0.623027
7,ric8dpn,0.619269
9,Mer8Adtd2,0.614812
6,att99ate,0.578223
