### Import Necessary libraries

In [2]:
import pandas as pd

## Prepare the Vectors

# Text

In [3]:

def aggregated_vectors(file_path, aggregation_method='mean'):
    """
    Aggregates vectors by image name using the specified aggregation method.

    Parameters:
    - file_path: str, the path to the CSV file containing the vectors and image names.
    - aggregation_method: str, the method of aggregation ('mean', 'sum', 'max', 'min', etc.).

    Returns:
    - aggregated_vectors: DataFrame, the aggregated vectors by image name.
    """
    # Load the file
    data = pd.read_csv(file_path)

    # Exclude the 'Caption' column before grouping
    data_without_caption = data.drop(columns=['Caption'])

    # Group the data by 'ImageName' and aggregate using the specified method
    if aggregation_method in ['mean', 'sum', 'max', 'min']:
        aggregated_vectors = data_without_caption.groupby('ImageName').agg(aggregation_method)
    else:
        raise ValueError("Unsupported aggregation method. Choose 'mean', 'sum', 'max', or 'min'.")

    return aggregated_vectors



In [None]:
file_path = 'Text.csv'  # Update this to your file path
aggregation_method = 'mean'  # Change this to 'sum', 'max', 'min', etc., as needed

aggregated_vectors_text = aggregated_vectors(file_path, aggregation_method)
print(aggregated_vectors_text.head())

## Image

In [5]:
def aggregate_vectors_no_tensor(file_path, aggregation_method='mean'):
    """
    Aggregates vectors by image name using the specified aggregation method, excluding tensor-like columns.

    Parameters:
    - file_path: str, the path to the CSV file containing the vectors and image names without headers.
    - aggregation_method: str, the method of aggregation ('mean', 'sum', 'max', 'min', etc.).

    Returns:
    - aggregated_vectors: DataFrame, the aggregated vectors by image name.
    """
    # Load the file without headers
    data = pd.read_csv(file_path, header=None)
    
    # Identify the image name column (last column) and exclude the tensor-like column (second last column)
    columns_to_use = list(range(data.shape[1] - 2)) + [data.shape[1] - 1]
    data_filtered = data[columns_to_use]

    # Rename columns for clarity
    new_column_names = [f'Vector_{i}' for i in range(data_filtered.shape[1] - 1)] + ['ImageName']
    data_filtered.columns = new_column_names

    # Group the data by 'ImageName' and aggregate using the specified method
    if aggregation_method in ['mean', 'sum', 'max', 'min']:
        aggregated_vectors = data_filtered.groupby('ImageName').agg(aggregation_method)
    else:
        raise ValueError("Unsupported aggregation method. Choose 'mean', 'sum', 'max', or 'min'.")

    return aggregated_vectors

In [None]:
file_path = 'Image.csv'  # Update this to your file path
aggregation_method = 'mean'  # Change this to 'sum', 'max', 'min', etc., as needed

aggregated_vectors_image = aggregate_vectors_no_tensor(file_path, aggregation_method)
print(aggregated_vectors_image.head())

In [None]:
aggregated_vectors_image

## R@K

In [6]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def transform_vectors(vectors, weight_matrix):
    """
    Transforms vectors by multiplying with a weight matrix.

    Parameters:
    - vectors: NumPy array of vectors to be transformed.
    - weight_matrix: NumPy array representing the weight matrix.

    Returns:
    - transformed_vectors: The vectors after transformation.
    """
    return np.dot(vectors, weight_matrix)

def calculate_recall_at_k(image_vectors, text_vectors, weight_matrix, image_names, k=1):
    """
    Calculates Recall@K for a set of image vectors and corresponding text vectors using a weight matrix.

    Parameters:
    - image_vectors: NumPy array of aggregated image vectors.
    - text_vectors: NumPy array of text vectors corresponding to the images.
    - weight_matrix: NumPy array representing the weight matrix for transforming vectors.
    - image_names: List of image names corresponding to both image and text vectors.
    - k: The number of top-ranked items to consider for calculating recall.

    Returns:
    - recall_at_k: The Recall@K score.
    """
    correct_predictions = 0

    # Transform text vectors using the weight matrix
    transformed_text_vectors = transform_vectors(text_vectors, weight_matrix)

    for index, image_vector in enumerate(image_vectors):
        # Calculate similarities between the current image vector and all transformed text vectors
        similarities = cosine_similarity([image_vector], transformed_text_vectors)[0]
        
        # Get the indices of the texts in descending order of similarity
        ranked_indices = np.argsort(similarities)[::-1]
        
        # Check if the correct text is within the top K
        correct_index = image_names.index(image_names[index])
        if correct_index in ranked_indices[:k]:
            correct_predictions += 1

    recall_at_k = correct_predictions / len(image_vectors)
    return recall_at_k



In [8]:
# Load the weight matrix from the binary file
best_solution_matrix = np.load(r"E:\MScoco\Weights\best_solution_matrix.npy")

In [None]:
best_solution_matrix

In [None]:
# Extract image names from the DataFrame index
image_names = list(aggregated_vectors_image.index)
image_names

In [None]:
image_names_text = list(aggregated_vectors_text.index)
image_names_text

In [None]:
print(len(image_names))

In [10]:
# Convert DataFrame to NumPy array (make sure to sort both DataFrames by index to ensure alignment)
aggregated_vectors_image_np = aggregated_vectors_image.sort_index().to_numpy()
aggregated_vectors_text_np = aggregated_vectors_text.sort_index().to_numpy()

In [None]:
k = 10
recall_at_k_score = calculate_recall_at_k(aggregated_vectors_image_np, aggregated_vectors_text_np, best_solution_matrix, image_names, k)
print(f"Recall@{k}: {recall_at_k_score}")

## Image retival

In [15]:
import numpy as np
import pandas as pd

# Adjust the path as necessary to load your CSV file and .npy file
image_df = pd.read_csv('Image.csv')
best_solution_matrix = np.load(r"E:\MScoco\Weights\best_solution_matrix_new_700_Parse.npy")

# Assuming the first columns are feature vectors and the last column is 'ImageName'
features = image_df.iloc[:, :-2].values
image_names = image_df.iloc[:, -1].values


In [16]:
# Transform feature vectors using the weights matrix
transformed_features = np.dot(features, best_solution_matrix)


In [17]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarities for all pairs of images
similarities = cosine_similarity(transformed_features)

# For simplicity, setting the diagonal to -np.inf to ignore self-similarity
np.fill_diagonal(similarities, -np.inf)

# Rank images based on similarity for each query (image)
ranks = np.argsort(similarities, axis=1)[:, ::-1]


In [None]:
# Placeholder for relevance criteria - you need to define this part
def is_relevant(query_image_name, candidate_image_name):
    # Implement your relevance criteria here
    return query_image_name == candidate_image_name

# Calculate R@K
K = 10  # Example for R@5
correct_counts = np.zeros(len(image_names))

for i, query_idx in enumerate(ranks):
    query_image_name = image_names[i]
    for rank in query_idx[:K]:
        if is_relevant(query_image_name, image_names[rank]):
            correct_counts[i] = 1
            break

recall_at_k = np.mean(correct_counts)
print(f'Recall@{K}: {recall_at_k}')


## Image-to-Text Retrieval

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Adjust the file path as necessary for your setup
best_solution_matrix = np.load(r'E:\\MScoco\\Weights\\best_solution_matrix_new_700_Parse.npy')
# Load the image and text datasets
image_df = pd.read_csv('Image.csv')
text_df = pd.read_csv('Text.csv')

In [3]:
# Assuming the first columns before the last two are the feature vectors
image_features = image_df.iloc[:, :-2].values
text_features = text_df.iloc[:, :-2].values

In [None]:
image_features.shape

In [None]:
text_features.shape

In [None]:
best_solution_matrix

In [6]:
 #Transform both image and text features by the weights matrix
transformed_image_features = np.dot(image_features, best_solution_matrix)
transformed_text_features = np.dot(text_features, best_solution_matrix)

In [7]:
# Calculate cosine similarities between transformed image and text features
# Calculate cosine similarity
cosine_sim = cosine_similarity(transformed_image_features, transformed_text_features)

In [None]:
K = 10  # Adjust as needed
ranked_indices = np.argsort(-cosine_sim, axis=1)[:, :K]
hits = 0

for i in range(len(image_df)):
    image_name = image_df.iloc[i, -1]
    relevant_text_indices = text_df.index[text_df['ImageName'] == image_name].tolist()
    top_k_indices = ranked_indices[i, :K]
    
    # Print image names and their top K retrieved texts
    print(f"Image Name: {image_name}")
    for rank, text_index in enumerate(top_k_indices, start=1):
        print(f"  Rank {rank}: {text_df.iloc[text_index]['Caption']}")
    print("\n")
    
    # Simple recall calculation
    if any(idx in top_k_indices for idx in relevant_text_indices):
        hits += 1

recall = hits / len(image_df)
print(f"Recall@{K}: {recall}")

In [None]:
# Assume all previous steps are as before

print(f"Total number of images: {len(image_df)}")
print(f"Total number of texts: {len(text_df)}")
print(f"Shape of transformed image features: {transformed_image_features.shape}")
print(f"Shape of transformed text features: {transformed_text_features.shape}")

# Calculate cosine similarity and find top K indices as before

hits = 0

for i in range(len(image_df)):
    image_name = image_df.iloc[i, -1]
    relevant_text_indices = text_df.index[text_df['ImageName'] == image_name].tolist()
    top_k_indices = ranked_indices[i, :K]
    
    # Debugging print: Image name and its relevant texts' indices
    print(f"\nImage Name: {image_name}, Relevant Text Indices: {relevant_text_indices}, Top K Indices: {top_k_indices}")
    
    # Print top K retrieved texts for debugging
    for rank, text_index in enumerate(top_k_indices, start=1):
        caption = text_df.iloc[text_index]['Caption']
        print(f"  Rank {rank}: {caption} (Index: {text_index})")

    # Check for hits
    if any(idx in top_k_indices for idx in relevant_text_indices):
        hits += 1
        print("Relevant text found in top K.")
    else:
        print("Relevant text NOT found in top K.")

# Detailed recall calculation printout
recall = hits / len(image_df)
print(f"\nHits: {hits}")
print(f"Total Images: {len(image_df)}")
print(f"Recall@{K}: {recall}")


In [None]:
recall1 = hits / len(common_names)
print(f"Recall@{K}: {recall1}")

## New based on the label

In [7]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
# Adjust the file path as necessary for your setup
best_solution_matrix = np.load(r'E:\\MScoco\\Weights\\best_solution_matrix_new_700_Parse.npy')
# Load the image and text datasets
image_df = pd.read_csv('Updated_Image.csv')
text_df = pd.read_csv('Updated_Text.csv')

In [9]:
# Assuming the first columns before the last two are the feature vectors
image_features = image_df.iloc[:, :-2].values
text_features = text_df.iloc[:, :-3].values

In [10]:
 #Transform both image and text features by the weights matrix
transformed_image_features = np.dot(image_features, best_solution_matrix)
transformed_text_features = np.dot(text_features, best_solution_matrix)

In [11]:
# Calculate cosine similarity
cosine_sim = cosine_similarity(transformed_image_features, transformed_text_features)

In [None]:
K = 10  # Adjust as needed
ranked_indices = np.argsort(-cosine_sim, axis=1)[:, :K]
hits = 0
total_relevant_items = 0

for i in range(len(image_df)):
    image_label = image_df.iloc[i, 10]  # Adjust index as per your dataset
    relevant_text_indices = text_df.index[text_df.iloc[:, 10] == image_label].tolist()
    top_k_indices = ranked_indices[i, :K]
    
    # Calculate the number of relevant items in top K
    relevant_items_count = sum(idx in top_k_indices for idx in relevant_text_indices)
    total_relevant_items += relevant_items_count
    
    # For each image, print the number of relevant items found in top K
    print(f"Image {i+1}: {relevant_items_count} relevant items found in top {K}")
    
    # Simple recall calculation
    if relevant_items_count > 0:
        hits += 1

recall = hits / len(image_df)
print(f"\nTotal relevant items found: {total_relevant_items}")
print(f"Recall@{K}: {recall}")

In [None]:
# Function to calculate recall
def calculate_recall_for_K(K):
    ranked_indices = np.argsort(-cosine_sim, axis=1)[:, :K]
    hits = 0

    for i in range(len(image_df)):
        image_label = image_df.iloc[i, 10]  # Adjust index as per your dataset
        relevant_text_indices = text_df.index[text_df.iloc[:, 10] == image_label].tolist()
        top_k_indices = ranked_indices[i, :K]
        
        # Check if there's at least one relevant item in top K
        if any(idx in top_k_indices for idx in relevant_text_indices):
            hits += 1

    recall = hits / len(image_df)
    return recall

# List of K values to calculate recall for
K_values = [1, 2, 5, 10]
print('hits',hits)
print("Total Images:",len(image_df))
# Calculate and print recall for each K
for K in K_values:
    recall = calculate_recall_for_K(K)
    print(f"Recall@{K}: {recall}")

In [None]:
# Function to calculate recall
def calculate_recall_for_K(K):
    ranked_indices = np.argsort(-cosine_sim, axis=1)[:, :K]
    hits = 0

    for i in range(len(image_df)):
        image_label = image_df.iloc[i, 10]  # Adjust index as per your dataset
        relevant_text_indices = text_df.index[text_df.iloc[:, 10] == image_label].tolist()
        top_k_indices = ranked_indices[i, :K]
        
        # Check if there's at least one relevant item in top K
        if any(idx in top_k_indices for idx in relevant_text_indices):
            hits += 1

    recall = hits / len(image_df)
    return recall

# List of K values to calculate recall for
K_values = [1, 2, 5, 10]

# Calculate and print recall for each K
for K in K_values:
    recall = calculate_recall_for_K(K)
    # Format recall as a percentage with one decimal place
    print(f"Recall@{K}: {recall:.1%}")

## With the other data

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Adjust the file path as necessary for your setup
best_solution_matrix = np.load(r'E:\\MScoco\\Weights\\best_solution_matrix_new_700_Parse.npy')
# Load the image and text datasets
image_df = pd.read_csv(r'E:\MScoco\R@K\full\NewTest\new_100_img_pca_10.csv')
text_df = pd.read_csv(r'E:\MScoco\R@K\full\NewTest\new_100_Text_pca_10.csv')

In [3]:
# Assuming the first columns before the last two are the feature vectors
image_features = image_df.iloc[:, :-2].values
text_features = text_df.iloc[:, :-3].values

In [4]:
 #Transform both image and text features by the weights matrix
transformed_image_features = np.dot(image_features, best_solution_matrix)
transformed_text_features = np.dot(text_features, best_solution_matrix)

In [5]:
# Calculate cosine similarity
cosine_sim = cosine_similarity(transformed_image_features, transformed_text_features)

In [None]:
K = 10  # Adjust as needed
ranked_indices = np.argsort(-cosine_sim, axis=1)[:, :K]
hits = 0
total_relevant_items = 0

for i in range(len(image_df)):
    image_label = image_df.iloc[i, 10]  # Adjust index as per your dataset
    relevant_text_indices = text_df.index[text_df.iloc[:, 10] == image_label].tolist()
    top_k_indices = ranked_indices[i, :K]
    
    # Calculate the number of relevant items in top K
    relevant_items_count = sum(idx in top_k_indices for idx in relevant_text_indices)
    total_relevant_items += relevant_items_count
    
    # For each image, print the number of relevant items found in top K
    print(f"Image {i+1}: {relevant_items_count} relevant items found in top {K}")
    
    # Simple recall calculation
    if relevant_items_count > 0:
        hits += 1

recall = hits / len(image_df)
print(f"\nTotal relevant items found: {total_relevant_items}")
print(f"Recall@{K}: {recall}")

In [None]:
# Function to calculate recall
def calculate_recall_for_K(K):
    ranked_indices = np.argsort(-cosine_sim, axis=1)[:, :K]
    hits = 0

    for i in range(len(image_df)):
        image_label = image_df.iloc[i, 10]  # Adjust index as per your dataset
        relevant_text_indices = text_df.index[text_df.iloc[:, 10] == image_label].tolist()
        top_k_indices = ranked_indices[i, :K]
        
        # Check if there's at least one relevant item in top K
        if any(idx in top_k_indices for idx in relevant_text_indices):
            hits += 1

    recall = hits / len(image_df)
    return recall

# List of K values to calculate recall for
K_values = [1, 2, 5, 10]
print('hits',hits)
print("Total Images:",len(image_df))
# Calculate and print recall for each K
for K in K_values:
    recall = calculate_recall_for_K(K)
    print(f"Recall@{K}: {recall}")

In [None]:
# Function to calculate recall
def calculate_recall_for_K(K):
    ranked_indices = np.argsort(-cosine_sim, axis=1)[:, :K]
    hits = 0

    for i in range(len(image_df)):
        image_label = image_df.iloc[i, 10]  # Adjust index as per your dataset
        relevant_text_indices = text_df.index[text_df.iloc[:, 10] == image_label].tolist()
        top_k_indices = ranked_indices[i, :K]
        
        # Check if there's at least one relevant item in top K
        if any(idx in top_k_indices for idx in relevant_text_indices):
            hits += 1

    recall = hits / len(image_df)
    return recall

# List of K values to calculate recall for
K_values = [1, 2, 5, 10]

# Calculate and print recall for each K
for K in K_values:
    recall = calculate_recall_for_K(K)
    # Format recall as a percentage with one decimal place
    print(f"Recall@{K}: {recall:.1%}")

## aggregation

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
# Adjust the file path as necessary for your setup
best_solution_matrix = np.load(r'E:\\MScoco\\Weights\\best_solution_matrix_new_700_Parse.npy')
# Load the image and text datasets
image_df = pd.read_csv(r'E:\MScoco\R@K\full\NewTest\new_100_img_pca_10.csv')
text_df = pd.read_csv(r'E:\MScoco\R@K\full\NewTest\new_100_Text_pca_10.csv')
# Assuming the first columns before the last two are the feature vectors
image_features = image_df.iloc[:, :-2].values
text_features = text_df.iloc[:, :-3].values

 #Transform both image and text features by the weights matrix
transformed_image_features = np.dot(image_features, best_solution_matrix)
transformed_text_features = np.dot(text_features, best_solution_matrix)
 

In [2]:
# Assuming image_names and caption_image_names are extracted as follows
image_names = image_df.iloc[:, -1]  # Image names for the image dataset
captions = text_df.iloc[:, -1]  


In [3]:
# Convert transformed features into DataFrames for easier manipulation
df_transformed_image_features = pd.DataFrame(transformed_image_features)
df_transformed_image_features['ImageName'] = image_names

In [None]:
df_transformed_image_features

In [5]:
# This line already does what you're asking for: It aggregates the features by 'ImageName',
# calculates the mean of these features for each unique 'ImageName', and retains the 'ImageName' in the result.
aggregated_image_features = df_transformed_image_features.groupby('ImageName').mean().reset_index()

In [None]:
aggregated_image_features

In [None]:
captions

In [8]:
df_transformed_text_features = pd.DataFrame(transformed_text_features)
df_transformed_text_features['captions'] = captions

In [None]:
df_transformed_text_features

In [34]:
aggregated_image_text = df_transformed_text_features.groupby('captions').mean().reset_index()

In [None]:
aggregated_image_text

In [10]:
caption_image_names = text_df.iloc[:, -2]  # Image names associated with captions in the second-last column of text_df

# Step 1: Create a mapping from captions to their associated image names
# Since the structure is different, we use the directly provided caption_image_names
caption_to_image_mapping = dict(zip(captions, caption_image_names))

In [None]:
caption_to_image_mapping

In [36]:
# Use the 'map' function to replace captions with image names
# This will create a new column 'ImageName' with the mapped values
aggregated_image_text['ImageName'] = aggregated_image_text['captions'].map(caption_to_image_mapping)

In [40]:
# Optionally, if you want to remove the original 'captions' column and retain the rest unchanged
new_df_text = aggregated_image_text.drop('captions', axis=1)

In [55]:
# Ensure both dataframes have the same number of rows (matching the number of rows in aggregated_image_features)
new_df_text_filtered = new_df_text.iloc[:62, :]  # Adjust the number (62) as needed

# Extracting the vectors from both filtered dataframes (assuming columns 1 to 10 are the vectors)
vectors_text = new_df_text_filtered.iloc[:, :10]
vectors_image = aggregated_image_features.iloc[:, 1:]

# Calculate cosine similarity matrix
cosine_similarity_matrix = cosine_similarity(vectors_text, vectors_image)


In [57]:
# Extracting image names from the first column of filtered aggregated_image_features
image_names_image = aggregated_image_features.iloc[:62, 0].values  # Adjust the number (62) as needed

# Extracting image names from the last column of new_df_text_filtered
image_names_text = new_df_text_filtered.iloc[:, -1].values


In [None]:
print("Shape of new_df_text:", new_df_text.shape)
print("Shape of aggregated_image_features:", aggregated_image_features.shape)


In [None]:
def recall_at_k(image_names_text, image_names_image, k):
    num_correct = 0
    for i, image_name_text in enumerate(image_names_text):
        top_k_indices = np.argsort(-1 * cosine_similarity_matrix[i])[:k]
        if image_names_image[i] in [image_names_text[index] for index in top_k_indices]:
            num_correct += 1
    recall = num_correct / len(image_names_text)
    return recall

# Calculate Recall@1, Recall@5, Recall@10
recall_1 = recall_at_k(image_names_text, image_names_image, 1)
recall_5 = recall_at_k(image_names_text, image_names_image, 5)
recall_10 = recall_at_k(image_names_text, image_names_image, 10)

print(f"Recall@1: {recall_1*100:.2f}%")
print(f"Recall@5: {recall_5*100:.2f}%")
print(f"Recall@10: {recall_10*100:.2f}%")

In [None]:
def recall_at_k(image_names_text, image_names_image, k, cosine_similarity_matrix):
    num_correct = 0
    for i, image_name_text in enumerate(image_names_text):
        top_k_indices = np.argsort(-1 * cosine_similarity_matrix[i])[:k]
        print(f"Top {k} indices for image name {image_name_text}: {top_k_indices}")
        if image_names_image[i] in [image_names_text[index] for index in top_k_indices]:
            num_correct += 1
    recall = num_correct / len(image_names_text)
    return recall

# Calculate Recall@1, Recall@5, Recall@10
recall_1 = recall_at_k(image_names_text, image_names_image, 1, cosine_similarity_matrix)
recall_5 = recall_at_k(image_names_text, image_names_image, 5, cosine_similarity_matrix)
recall_10 = recall_at_k(image_names_text, image_names_image, 10, cosine_similarity_matrix)

print(f"Recall@1: {recall_1*100:.2f}%")
print(f"Recall@5: {recall_5*100:.2f}%")
print(f"Recall@10: {recall_10*100:.2f}%")


## 20x20 R@K

In [6]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

def calculate_recall_and_print(image_path, text_path, best_solution_matrix_path,index_label):
    # Load the best solution matrix
    best_solution_matrix = np.load(best_solution_matrix_path)
    
    # Load the image and text datasets
    image_df = pd.read_csv(image_path)
    text_df = pd.read_csv(text_path)
    
    # Assuming the first columns before the last two are the feature vectors
    image_features = image_df.iloc[:, :-2].values
    text_features = text_df.iloc[:, :-3].values
    
    # Transform both image and text features by the weights matrix
    transformed_image_features = np.dot(image_features, best_solution_matrix)
    transformed_text_features = np.dot(text_features, best_solution_matrix)
    
    # Calculate cosine similarity
    cosine_sim = cosine_similarity(transformed_image_features, transformed_text_features)
    
    # Print the size of the datasets
    print("Size of image dataset:", image_features.shape)
    print("Size of text dataset:", text_features.shape)
    print('Size of best solution matrix:', best_solution_matrix.shape)
    
    # Function to calculate recall
    def calculate_recall_for_K(K):
        ranked_indices = np.argsort(-cosine_sim, axis=1)[:, :K]
        hits = 0

        for i in range(len(image_df)):
            image_label = image_df.iloc[i, index_label]  # Adjust index as per your dataset
            relevant_text_indices = text_df.index[text_df.iloc[:, index_label] == image_label].tolist()
            top_k_indices = ranked_indices[i, :K]
            
            # Check if there's at least one relevant item in top K
            if any(idx in top_k_indices for idx in relevant_text_indices):
                hits += 1

        recall = hits / len(image_df)
        return recall

    # List of K values to calculate recall for
    K_values = [1,5,10]

    # Calculate and print recall for each K
    for K in K_values:
        recall = calculate_recall_for_K(K)
        # Format recall as a percentage with one decimal place
        print(f"Recall@{K}: {recall:.1%}")



In [None]:
# Example usage:
image_path=r"E:\MScoco\R@K\full\NewTest\new_100_img_pca_20.csv"
text_path=r"E:\MScoco\R@K\full\NewTest\new_100_Text_pca_20.csv"
best_solution_matrix_path=r"E:\MScoco\Weights\best_solution_matrix_NEW_S_file7_20tanewVersion800.npy"
calculate_recall_and_print(image_path,text_path,best_solution_matrix_path,20)
