In [31]:
def apk(actual, predicted, k=10):
    """
    Computes the average precision at k.

    Parameters:
      actual (list): A list of the ground truth (relevant) items.
      predicted (list): A list of predicted items (order matters).
      k (int): The rank cutoff.

    Returns:
      score (float): The average precision at k.
    """
    if len(predicted) > k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i, p in enumerate(predicted):
        # Check if the item is relevant and not already counted
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i + 1.0)

    # If there are no relevant items, return 0.0
    if not actual:
        return 0.0

    # Normalize by the number of relevant items or k, whichever is smaller
    return score / min(len(actual), k)


def mapk(actual_list, predicted_list, k=10):
    """
    Computes the mean average precision at k.

    Parameters:
      actual_list (list of lists): A list of lists containing the ground truth items for each query.
      predicted_list (list of lists): A list of lists containing the predicted ranking for each query.
      k (int): The rank cutoff.

    Returns:
      score (float): The mean average precision at k over all queries.
    """
    return sum(apk(a, p, k) for a, p in zip(actual_list, predicted_list)) / len(actual_list)


# Example usage:
actual_list = [
    ['A', 'B', 'C'],  # Ground truth for query 1
    ['A', 'B']        # Ground truth for query 2
]

predicted_list = [
    ['B', 'D', 'A', 'C', 'E'],  # Prediction for query 1
    ['D', 'C', 'B', 'A']         # Prediction for query 2
]

k = 5
mapk_score = mapk(actual_list, predicted_list, k)
print("MAP@{}: {:.4f}".format(k, mapk_score))


MAP@5: 0.6111


In [27]:
# LIME MAPK

import pandas as pd
import re

zero_shot_df = pd.read_csv("./weighted_loss/keywords/zero_shot_rouge.csv")
zero_shot_rag_df = pd.read_csv("./weighted_loss/keywords/zero_shot_rag_rouge.csv")
few_shot_df = pd.read_csv("./weighted_loss/keywords/few_shot_rouge.csv")
few_shot_rag_df = pd.read_csv("./weighted_loss/keywords/few_shot_rag_rouge.csv")

# ZERO-SHOT
keywords_from_roberta = zero_shot_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = zero_shot_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("zero-shot:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

# ZERO-SHOT-RAG
keywords_from_roberta = zero_shot_rag_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = zero_shot_rag_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("zero-shot-rag:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

# FEW-SHOT
keywords_from_roberta = few_shot_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = few_shot_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("few-shot:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

# FEW-SHOT-RAG
keywords_from_roberta = few_shot_rag_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = few_shot_rag_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("few-shot-rag:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

zero-shot: 0.4811494587641342
zero-shot-rag: 0.4407765921409183
few-shot: 0.5415599773767261
few-shot-rag: 0.338623209446379


In [29]:
# SHAP MAPK

import pandas as pd
import re

zero_shot_df = pd.read_csv("./shap/keywords/zero_shot_rouge.csv")
zero_shot_rag_df = pd.read_csv("./shap/keywords/zero_shot_rag_rouge.csv")
few_shot_df = pd.read_csv("./shap/keywords/few_shot_rouge.csv")
few_shot_rag_df = pd.read_csv("./shap/keywords/few_shot_rag_rouge.csv")

# ZERO-SHOT
keywords_from_roberta = zero_shot_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = zero_shot_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("zero-shot:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

# ZERO-SHOT-RAG
keywords_from_roberta = zero_shot_rag_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = zero_shot_rag_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("zero-shot-rag:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

# FEW-SHOT
keywords_from_roberta = few_shot_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = few_shot_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("few-shot:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

# FEW-SHOT-RAG
keywords_from_roberta = few_shot_rag_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = few_shot_rag_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("few-shot-rag:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

zero-shot: 0.44727658851511876
zero-shot-rag: 0.39861655536198115
few-shot: 0.49838667797830005
few-shot-rag: 0.2826859514130852


In [30]:
# LLAMA MAPK

import pandas as pd
import re

zero_shot_df = pd.read_csv("./weighted_loss/llama_keywords/zero_shot_rouge.csv")
zero_shot_rag_df = pd.read_csv("./weighted_loss/llama_keywords/zero_shot_rag_rouge.csv")
few_shot_df = pd.read_csv("./weighted_loss/llama_keywords/few_shot_rouge.csv")
few_shot_rag_df = pd.read_csv("./weighted_loss/llama_keywords/few_shot_rag_rouge.csv")

# ZERO-SHOT
keywords_from_roberta = zero_shot_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = zero_shot_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("zero-shot:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

# ZERO-SHOT-RAG
keywords_from_roberta = zero_shot_rag_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = zero_shot_rag_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("zero-shot-rag:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

# FEW-SHOT
keywords_from_roberta = few_shot_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = few_shot_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("few-shot:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

# FEW-SHOT-RAG
keywords_from_roberta = few_shot_rag_df["keywords"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_roberta = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_roberta]
# Remove empty strings
keywords_from_roberta = [[word.lower() for word in words if word] for words in keywords_from_roberta]
keywords_from_icl = few_shot_rag_df["keywords_from_icl"].fillna("").astype(str).tolist()
# Split by punctuations and spaces, but ignore commas between numbers, e.g. 30,000
keywords_from_icl = [re.split(r'(?<!\d),(?!\d)|[ .!?;:]+', keywords) for keywords in keywords_from_icl]
# Remove empty strings and make keywords lowercase
keywords_from_icl = [[word.lower() for word in words if word] for words in keywords_from_icl]
# print(keywords_from_roberta[696])
# print(keywords_from_icl[696])
# print(apk(actual=keywords_from_icl[696], predicted=keywords_from_roberta[696], k=5))
print("few-shot-rag:", mapk(actual_list=keywords_from_icl, predicted_list=keywords_from_roberta, k=5))

zero-shot: 0.09562846310877784
zero-shot-rag: 0.30552990708478484
few-shot: 0.3594736141370111
few-shot-rag: 0.15242668408826932
