In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm  # For progress bar
import os

# ---------------------- Configuration ----------------------

# Paths to your CSV files
DATABASE_PATH = '5_alteast4_movielens_data.csv'
AUX_DATA_PATH = '5_percent_mldataset_intersected_adversary_data_for.csv'
WEIGHTS_PATH = '5_movie_weights.csv'

# Output directories for results
OUTPUT_DIR_UNIFORM = 'deanonymization_results_uniform'
OUTPUT_DIR_NO_TOP_100 = 'deanonymization_results_no_top_100'
OUTPUT_DIR_NO_TOP_500 = 'deanonymization_results_no_top_500'
# Ensure output directories exist
os.makedirs(OUTPUT_DIR_UNIFORM, exist_ok=True)
os.makedirs(OUTPUT_DIR_NO_TOP_100, exist_ok=True)
os.makedirs(OUTPUT_DIR_NO_TOP_500, exist_ok=True)



# Batch size for processing ml users
BATCH_SIZE = 1000

# ---------------------- Load Datasets ----------------------

print("Loading datasets...")
database = pd.read_csv(DATABASE_PATH)
aux_data = pd.read_csv(AUX_DATA_PATH)
weights_df = pd.read_csv(WEIGHTS_PATH)



# Get unique 'imdbId' values from both dataframes
unique_aux_ids = set(aux_data['imdbId'].unique())
unique_ml_ids = set(database['imdbId'].unique())

# Check if the difference is zero
if unique_aux_ids - unique_ml_ids == set():
    print("All GOOD,no extra movied id found")
else:
    print("adv is not a subset of data")
    
# Drop rows with empty 'rating_value' in aux_data and reset index
aux_data = aux_data.dropna(subset=['rating_value']).reset_index(drop=True)

# Half the ratings as MovieLens scale is 0-5
aux_data['rating_value'] = aux_data['rating_value'] / 2

# ---------------------- Preprocessing ----------------------

# Convert 'imdbId' to strings to ensure consistency
database['imdbId'] = database['imdbId'].astype(str)
aux_data['imdbId'] = aux_data['imdbId'].astype(str)

# Rename 'userId' columns to avoid confusion
database = database.rename(columns={'userId': 'ml_userId'})
aux_data = aux_data.rename(columns={'userId': 'aux_userId'})

# Convert weights to a dictionary for fast lookup
movie_weights = dict(zip(weights_df['imdbId'].astype(str), weights_df['weight']))


# ---------------------- Define Filtering Function ----------------------

def filter_top_n_movies(database, aux_data, n=500):
    """
    Filters out the top N most rated movies from both MovieLens and auxiliary datasets.

    Parameters:
    - database (DataFrame): MovieLens dataset.
    - aux_data (DataFrame): Auxiliary adversary dataset.
    - n (int): Number of top movies to exclude.

    Returns:
    - Tuple of filtered (database, aux_data, top_movies)
    """
    print(f"Excluding top {n} most popular movies...")

    # Identify top N movies based on MovieLens dataset
    top_movies = database['imdbId'].value_counts().head(n).index.tolist()
    # print(f"Top {n} movies to exclude: {top_movies}")

    # Filter out top N movies from both datasets
    filtered_database = database[~database['imdbId'].isin(top_movies)].reset_index(drop=True)
    filtered_aux_data = aux_data[~aux_data['imdbId'].isin(top_movies)].reset_index(drop=True)

    print(f"Database size after filtering: {filtered_database.shape}")
    print(f"Auxiliary data size after filtering: {filtered_aux_data.shape}")

    return filtered_database, filtered_aux_data, top_movies

# ---------------------- Apply Filtering ----------------------

# Choose which output directory to use based on exclusion
# Here, we're focusing on excluding top 500 movies
EXCLUDE_TOP_N = 500
database, aux_data, top_movies = filter_top_n_movies(database, aux_data, n=EXCLUDE_TOP_N)



# ---------------------- Create imdbId to aux_userIds Mapping ----------------------

print("Creating imdbId to aux_userIds mapping...")
# Group aux_data by 'imdbId' and aggregate 'aux_userId' into sets for fast lookup
imdb_to_aux_users = aux_data.groupby('imdbId')['aux_userId'].apply(set).to_dict()
print(f"Total unique movies in aux data: {len(imdb_to_aux_users)}")

# ---------------------- Get Unique ml_userIds ----------------------

ml_user_ids = database['ml_userId'].unique()
total_ml_users = len(ml_user_ids)
print(f"Total unique MovieLens users: {total_ml_users}")



# ---------------------- Processing Function ----------------------

def process_ml_users_in_batches(database, ml_user_ids, output_dir, exclude_top_n=0):
    """
    Process MovieLens users in batches, finding auxiliary users who have rated at least
    one common movie, and save the results to CSV files.

    Parameters:
    - database (DataFrame): Filtered MovieLens dataset.
    - ml_user_ids (array-like): Unique MovieLens user IDs.
    - output_dir (str): Directory to save batch results.
    - exclude_top_n (int): Number of top movies to exclude (already handled).
    """
    # Since top N movies are already excluded before calling this function, no need to exclude again.
    # This parameter can be retained for flexibility or removed if unnecessary.

    # Iterate over batches of MovieLens users
    for i in range(0, len(ml_user_ids), BATCH_SIZE):
        batch_ml_user_ids = ml_user_ids[i:i + BATCH_SIZE]
        batch_start = i
        batch_end = i + len(batch_ml_user_ids) - 1
        print(f"\nProcessing MovieLens users {batch_start} to {batch_end} (Batch size: {len(batch_ml_user_ids)})")

        # Filter database for the current batch of ml users
        ml_batch = database[database['ml_userId'].isin(batch_ml_user_ids)]

        # Group by 'ml_userId' and aggregate 'imdbId' into sets
        ml_user_imdbs = ml_batch.groupby('ml_userId')['imdbId'].apply(set).to_dict()

        # Initialize results list
        results = []

        # Iterate over each ml user in the batch
        for ml_user_id, ml_imdb_set in tqdm(ml_user_imdbs.items(), desc="Processing ml users in batch"):
            # Initialize a set to collect aux_userIds who have rated at least one common movie
            aux_users_set = set()

            # Iterate over each imdbId rated by the ml user
            for imdb_id in ml_imdb_set:
                # Get aux_userIds who have rated this imdbId
                aux_user_ids = imdb_to_aux_users.get(imdb_id, set())
                aux_users_set.update(aux_user_ids)

            # Convert aux_users_set to a sorted list for consistency
            aux_users_list = sorted(aux_users_set)

            # Store the result
            results.append({
                'ml_user_id': ml_user_id,
                'aux_users': ','.join(map(str, aux_users_list))  # Convert list to comma-separated string
            })

        # Create a DataFrame from the results
        batch_results_df = pd.DataFrame(results)

        # Define the output filename
        output_filename = f'batch_{batch_start}_to_{batch_end}_results.csv'
        output_filepath = os.path.join(output_dir, output_filename)

        # Save the batch results to CSV
        batch_results_df.to_csv(output_filepath, index=False)
        print(f"Saved results for MovieLens users {batch_start} to {batch_end} to '{output_filepath}'")

        # Clear results to free memory
        del results, batch_results_df, ml_batch, ml_user_imdbs



# ---------------------- Main Execution ----------------------

# print("Starting batch processing of MovieLens users (Uniform)...")
# process_ml_users_in_batches(database, ml_user_ids, OUTPUT_DIR_UNIFORM)

# print("Starting batch processing of MovieLens users (Without Top 100 Movies)...")
# process_ml_users_in_batches(database, ml_user_ids, OUTPUT_DIR_NO_TOP_100, exclude_top_n=100)

print("Starting batch processing of MovieLens users (Without Top 500 Movies)...")
process_ml_users_in_batches(database, ml_user_ids, OUTPUT_DIR_NO_TOP_500, exclude_top_n=500)

print("\nBatch processing completed successfully.")


Loading datasets...
All GOOD,no extra movied id found
Excluding top 500 most popular movies...
Database size after filtering: (217713, 4)
Auxiliary data size after filtering: (1484750, 5)
Creating imdbId to aux_userIds mapping...
Total unique movies in aux data: 5910
Total unique MovieLens users: 7519
Starting batch processing of MovieLens users (Without Top 500 Movies)...

Processing MovieLens users 0 to 999 (Batch size: 1000)


Processing ml users in batch: 100%|██████████| 1000/1000 [00:05<00:00, 174.83it/s]


Saved results for MovieLens users 0 to 999 to 'deanonymization_results_no_top_500\batch_0_to_999_results.csv'

Processing MovieLens users 1000 to 1999 (Batch size: 1000)


Processing ml users in batch: 100%|██████████| 1000/1000 [00:05<00:00, 192.92it/s]


Saved results for MovieLens users 1000 to 1999 to 'deanonymization_results_no_top_500\batch_1000_to_1999_results.csv'

Processing MovieLens users 2000 to 2999 (Batch size: 1000)


Processing ml users in batch: 100%|██████████| 1000/1000 [00:05<00:00, 174.54it/s]


Saved results for MovieLens users 2000 to 2999 to 'deanonymization_results_no_top_500\batch_2000_to_2999_results.csv'

Processing MovieLens users 3000 to 3999 (Batch size: 1000)


Processing ml users in batch: 100%|██████████| 1000/1000 [00:05<00:00, 180.22it/s]


Saved results for MovieLens users 3000 to 3999 to 'deanonymization_results_no_top_500\batch_3000_to_3999_results.csv'

Processing MovieLens users 4000 to 4999 (Batch size: 1000)


Processing ml users in batch: 100%|██████████| 1000/1000 [00:05<00:00, 197.37it/s]


Saved results for MovieLens users 4000 to 4999 to 'deanonymization_results_no_top_500\batch_4000_to_4999_results.csv'

Processing MovieLens users 5000 to 5999 (Batch size: 1000)


Processing ml users in batch: 100%|██████████| 1000/1000 [00:05<00:00, 184.95it/s]


Saved results for MovieLens users 5000 to 5999 to 'deanonymization_results_no_top_500\batch_5000_to_5999_results.csv'

Processing MovieLens users 6000 to 6999 (Batch size: 1000)


Processing ml users in batch: 100%|██████████| 1000/1000 [00:05<00:00, 187.20it/s]


Saved results for MovieLens users 6000 to 6999 to 'deanonymization_results_no_top_500\batch_6000_to_6999_results.csv'

Processing MovieLens users 7000 to 7518 (Batch size: 519)


Processing ml users in batch: 100%|██████████| 519/519 [00:02<00:00, 198.60it/s]


Saved results for MovieLens users 7000 to 7518 to 'deanonymization_results_no_top_500\batch_7000_to_7518_results.csv'

Batch processing completed successfully.


In [2]:
# Step 1: Create a Unified Set of imdbId Values
all_imdbIds = pd.unique(pd.concat([database['imdbId'], aux_data['imdbId']]))
all_imdbIds = sorted(all_imdbIds)  # Ensure consistent ordering
num_movies = len(all_imdbIds)
print(f"Total unique movies across both datasets: {num_movies}")


Total unique movies across both datasets: 5916


In [3]:
# Step 2: Map imdbId to Column Indice
imdbId_to_col_idx = {imdbId: idx for idx, imdbId in enumerate(all_imdbIds)}


In [4]:
# Step 3a: Map Users to Row Indices
ml_userIds = sorted(database['ml_userId'].unique())
ml_userId_to_row_idx = {userId: idx for idx, userId in enumerate(ml_userIds)}
num_ml_users = len(ml_userIds)
print(f"Total unique MovieLens users: {num_ml_users}")

Total unique MovieLens users: 7519


In [5]:
# Step 3b: Map Users to Row Indices

aux_userIds = sorted(aux_data['aux_userId'].unique())
aux_userId_to_row_idx = {userId: idx for idx, userId in enumerate(aux_userIds)}
num_aux_users = len(aux_userIds)
print(f"Total unique auxiliary users: {num_aux_users}")


Total unique auxiliary users: 706585


In [6]:
from scipy.sparse import coo_matrix

# Prepare data for the ratings matrix
database_rows = []
database_cols = []
database_ratings_data = []
database_timestamps_data = []

for idx, row in tqdm(database.iterrows(), total=database.shape[0], desc='Processing database ratings'):
    userId = row['ml_userId']
    imdbId = row['imdbId']
    rating = row['rating']
    timestamp = row['timestamp']
    
    row_idx = ml_userId_to_row_idx[userId]
    col_idx = imdbId_to_col_idx[imdbId]
    
    database_rows.append(row_idx)
    database_cols.append(col_idx)
    database_ratings_data.append(rating)
    database_timestamps_data.append(timestamp)

database_ratings_matrix = coo_matrix((database_ratings_data, (database_rows, database_cols)),
                                     shape=(num_ml_users, num_movies)).tocsr()

database_timestamps_matrix = coo_matrix((database_timestamps_data, (database_rows, database_cols)),
                                        shape=(num_ml_users, num_movies)).tocsr()

Processing database ratings: 100%|██████████| 217713/217713 [00:20<00:00, 10779.51it/s]


In [7]:
# Convert 'rating_value' to numeric, setting errors to NaN
aux_data['rating_value'] = pd.to_numeric(aux_data['rating_value'], errors='coerce')



In [8]:
aux_rows = []
aux_cols = []
aux_ratings_data = []
aux_timestamps_data = []
for idx, row in tqdm(aux_data.iterrows(), total=aux_data.shape[0], desc='Processing aux ratings'):
    userId = row['aux_userId']
    imdbId = row['imdbId']
    rating = row['rating_value']
    timestamp = row['review_date_epoch']
    
    if pd.isna(rating):
        continue  # Skip entries without valid ratings
    
    row_idx = aux_userId_to_row_idx[userId]
    col_idx = imdbId_to_col_idx.get(imdbId)
    if col_idx is None:
        continue  # Should not happen if we've combined all imdbIds
    
    aux_rows.append(row_idx)
    aux_cols.append(col_idx)
    aux_ratings_data.append(rating)
    aux_timestamps_data.append(timestamp)

aux_ratings_matrix = coo_matrix((aux_ratings_data, (aux_rows, aux_cols)),
                                shape=(num_aux_users, num_movies)).tocsr()


Processing aux ratings: 100%|██████████| 1484750/1484750 [01:02<00:00, 23628.97it/s]


In [9]:
aux_timestamps_matrix = coo_matrix((aux_timestamps_data, (aux_rows, aux_cols)),
                                   shape=(num_aux_users, num_movies)).tocsr()


In [10]:
# Step 5: Prepare Weights Array
weights_df['imdbId'] = weights_df['imdbId'].astype(str)
weights_df['col_idx'] = weights_df['imdbId'].map(imdbId_to_col_idx)
weights_df = weights_df.dropna(subset=['col_idx'])
weights = np.zeros(num_movies)

weights[weights_df['col_idx'].astype(int)] = weights_df['weight'].values


In [11]:
def compute_rating_similarity(ml_ratings, candidate_ratings, rating_threshold=1):
    # Compute absolute difference
    rating_diff = np.abs(ml_ratings - candidate_ratings)
    # Valid ratings are those where both users have rated the movie
    valid_mask = (ml_ratings > 0) & (candidate_ratings > 0)
    # Ratings are similar if the difference is within the threshold
    rating_sim = np.zeros_like(ml_ratings)
    rating_sim[valid_mask] = (rating_diff[valid_mask] <= rating_threshold).astype(float)
    return rating_sim

def compute_timestamp_similarity(ml_timestamps, candidate_timestamps, time_threshold=14 * 24 * 3600):
    # Compute absolute difference in timestamps
    time_diff = np.abs(ml_timestamps - candidate_timestamps)
    # Valid timestamps are those where both users have timestamps
    valid_mask = (ml_timestamps > 0) & (candidate_timestamps > 0)
    # Timestamps are similar if the difference is within the threshold
    timestamp_sim = np.zeros_like(ml_timestamps)
    timestamp_sim[valid_mask] = (time_diff[valid_mask] <= time_threshold).astype(float)
    return timestamp_sim


In [12]:
def compute_rating_presence_similarity(ml_ratings, candidate_ratings):
    # Create a mask where both users have rated the movie (presence check)
    valid_mask = (ml_ratings > 0) & (candidate_ratings > 0)
    # Set similarity to 1.0 if both users have rated the movie, 0.0 otherwise
    rating_sim = np.zeros_like(ml_ratings)
    rating_sim[valid_mask] = 1.0
    return rating_sim
def compute_timestamp_presence_similarity(ml_timestamps, candidate_timestamps):
    # Create a mask where both users have timestamps (presence check)
    valid_mask = (ml_timestamps > 0) & (candidate_timestamps > 0)
    # Set similarity to 1.0 if both users have timestamps, 0.0 otherwise
    timestamp_sim = np.zeros_like(ml_timestamps)
    timestamp_sim[valid_mask] = 1.0
    return timestamp_sim



In [None]:
import numpy as np
from tqdm import tqdm

def compute_scores_for_ml_user(ml_user_id, candidate_aux_user_ids, phi=1.5, sub_batch_size=10000):
    """
    Compute scores between one ml_user_id and all candidate auxiliary users in sub-batches to manage memory usage.

    Parameters:
    - ml_user_id (int): The MovieLens user ID to de-anonymize.
    - candidate_aux_user_ids (list of int): List of auxiliary user IDs who have at least one movie rating in common.
    - phi (float): Eccentricity threshold to determine a unique match.
    - sub_batch_size (int): Number of auxiliary users to process in each sub-batch.

    Returns:
    - tuple or None: Returns (matched_aux_userId, max_score) if a unique match is found, else None.
    """

    # Get ml_user_idx
    ml_user_idx = ml_userId_to_row_idx.get(ml_user_id)
    if ml_user_idx is None:
        return None  # ML user ID not found

    # Get the indices of the movies rated by the ml_user_id
    ml_rated_movies = database_ratings_matrix.getrow(ml_user_idx).indices
    ml_ratings = database_ratings_matrix.getrow(ml_user_idx).data
    ml_timestamps = database_timestamps_matrix.getrow(ml_user_idx).data
    ml_weights = weights[ml_rated_movies]

    num_movies = len(ml_rated_movies)

    if num_movies == 0:
        # ml_user_id has not rated any movies
        return None

    # Map candidate_aux_user_ids to row indices, ensuring they exist
    candidate_aux_user_indices = [
        aux_userId_to_row_idx.get(aux_user_id)
        for aux_user_id in candidate_aux_user_ids
    ]
    # Remove None values (aux_user_ids not found)
    candidate_aux_user_indices = [idx for idx in candidate_aux_user_indices if idx is not None]

    num_candidates = len(candidate_aux_user_indices)

    if num_candidates == 0:
        return None  # No candidates found

    # Initialize an array to accumulate scores
    scores = np.zeros(num_candidates)

    # Process auxiliary users in sub-batches
    for start in range(0, num_candidates, sub_batch_size):
        end = min(start + sub_batch_size, num_candidates)
        sub_batch_indices = candidate_aux_user_indices[start:end]

        # Get candidate ratings and timestamps matrices (sub_batch_size x num_movies)
        candidate_ratings = aux_ratings_matrix[sub_batch_indices][:, ml_rated_movies].toarray()
        candidate_timestamps = aux_timestamps_matrix[sub_batch_indices][:, ml_rated_movies].toarray()

        # Broadcast ml_ratings and ml_timestamps to match sub-batch size
        # Shape: (sub_batch_size, num_movies)
        ml_ratings_matrix = np.tile(ml_ratings, (len(sub_batch_indices), 1))
        ml_timestamps_matrix = np.tile(ml_timestamps, (len(sub_batch_indices), 1))

        # Compute similarities
        rating_sim = compute_rating_similarity(ml_ratings_matrix, candidate_ratings)
        timestamp_sim = compute_timestamp_similarity(ml_timestamps_matrix, candidate_timestamps)

        sim = rating_sim * timestamp_sim

        # Multiply similarities by weights and sum over movies
        # Each movie's weight is applied to the corresponding column
        # Shape of sim: (sub_batch_size, num_movies)
        # Shape of ml_weights: (num_movies,)
        # Broadcasting ml_weights across rows
        weighted_sim = sim * ml_weights

        # Sum across movies to get total score for each auxiliary user in the sub-batch
        sub_batch_scores = np.sum(weighted_sim, axis=1)

        # Accumulate the scores into the main scores array
        scores[start:end] += sub_batch_scores

        # Free memory for this sub-batch
        del candidate_ratings, candidate_timestamps, ml_ratings_matrix, ml_timestamps_matrix, rating_sim, timestamp_sim, sim, weighted_sim, sub_batch_scores
        gc.collect()

    # After processing all sub-batches, determine if there's a unique match
    max_score_idx = np.argmax(scores)
    max_score = scores[max_score_idx]
    sorted_scores = np.sort(scores)[::-1]
    max2_score = sorted_scores[1] if len(sorted_scores) > 1 else 0
    sigma = np.std(scores)
    eccentricity = (max_score - max2_score) / sigma if sigma > 0 else np.inf
    
    min_score_threshold = 0.1  # Define a minimum score required for a valid match
    
    if eccentricity < phi or max_score < min_score_threshold:
        return None  # No unique match found
    else:
        # Return the matched auxiliary user and score
        matched_aux_user_idx = candidate_aux_user_indices[max_score_idx]
        matched_aux_userId = aux_userIds[matched_aux_user_idx]
        return matched_aux_userId, max_score


In [None]:
import numpy as np
from tqdm import tqdm
import gc

def compute_scores_for_ml_user(ml_user_id, candidate_aux_user_ids, phi=1.5, sub_batch_size=10000, 
                               max_rating_diff=1, max_time_diff=14*24*3600, rho0=2, d0=30*24*3600):
    """
    Compute scores between one ml_user_id and all candidate auxiliary users in sub-batches to manage memory usage.

    Parameters:
    - ml_user_id (int): The MovieLens user ID to de-anonymize.
    - candidate_aux_user_ids (list of int): List of auxiliary user IDs who have at least one movie rating in common.
    - phi (float): Eccentricity threshold to determine a unique match.
    - sub_batch_size (int): Number of auxiliary users to process in each sub-batch.
    - max_rating_diff (int): Maximum allowed difference in ratings.
    - max_time_diff (int): Maximum allowed difference in timestamps (in seconds).
    - rho0 (float): Scaling parameter for rating differences.
    - d0 (float): Scaling parameter for timestamp differences (in seconds).

    Returns:
    - tuple or None: Returns (matched_aux_userId, max_score) if a unique match is found, else None.
    """

    # Get ml_user_idx
    ml_user_idx = ml_userId_to_row_idx.get(ml_user_id)
    if ml_user_idx is None:
        return None  # ML user ID not found

    # Get the indices of the movies rated by the ml_user_id
    ml_rated_movies = database_ratings_matrix.getrow(ml_user_idx).indices
    ml_ratings = database_ratings_matrix.getrow(ml_user_idx).data
    ml_timestamps = database_timestamps_matrix.getrow(ml_user_idx).data
    ml_weights = weights[ml_rated_movies]

    if len(ml_rated_movies) == 0:
        # ml_user_id has not rated any movies
        return None

    # Map candidate_aux_user_ids to row indices, ensuring they exist
    candidate_aux_user_indices = [
        aux_userId_to_row_idx.get(aux_user_id)
        for aux_user_id in candidate_aux_user_ids
    ]
    # Remove None values (aux_user_ids not found)
    candidate_aux_user_indices = [idx for idx in candidate_aux_user_indices if idx is not None]

    if len(candidate_aux_user_indices) == 0:
        return None  # No candidates found

    # Initialize an array to accumulate scores
    scores = np.zeros(len(candidate_aux_user_indices))

    # Process auxiliary users in sub-batches
    for start in range(0, len(candidate_aux_user_indices), sub_batch_size):
        end = min(start + sub_batch_size, len(candidate_aux_user_indices))
        sub_batch_indices = candidate_aux_user_indices[start:end]

        # Get candidate data for the current sub-batch
        candidate_ratings_matrix = aux_ratings_matrix[sub_batch_indices][:, ml_rated_movies]
        candidate_timestamps_matrix = aux_timestamps_matrix[sub_batch_indices][:, ml_rated_movies]

        # Convert to dense arrays for easier manipulation
        candidate_ratings = candidate_ratings_matrix.toarray()
        candidate_timestamps = candidate_timestamps_matrix.toarray()

        # Prepare ML user data matrices
        ml_ratings_matrix = np.tile(ml_ratings, (len(sub_batch_indices), 1))
        ml_timestamps_matrix = np.tile(ml_timestamps, (len(sub_batch_indices), 1))
        ml_weights_matrix = np.tile(ml_weights, (len(sub_batch_indices), 1))

        # Create a valid mask where both users have rated the same movie
        valid_mask = (ml_ratings_matrix > 0) & (candidate_ratings > 0)

        # If there are no overlapping movies for any auxiliary users, skip this sub-batch
        if not np.any(valid_mask):
            continue

        # Compute absolute differences
        rating_diff = np.abs(ml_ratings_matrix - candidate_ratings)
        time_diff = np.abs(ml_timestamps_matrix - candidate_timestamps)

        # Apply valid mask to zero out differences where either user hasn't rated the movie
        rating_diff[~valid_mask] = 0
        time_diff[~valid_mask] = 0

        # Create a mask for threshold-based ignoring
        threshold_mask = (rating_diff <= max_rating_diff) & (time_diff <= max_time_diff)

        # Apply the threshold mask: set differences exceeding thresholds to zero similarity
        # Compute exponential similarities only where thresholds are met
        rating_exp = np.exp(-rating_diff / rho0) * threshold_mask
        time_exp = np.exp(-time_diff / d0) * threshold_mask

        # Calculate similarity score
        sim = (rating_exp + time_exp) * ml_weights_matrix

        # Ensure that similarity is zero where thresholds are exceeded
        sim *= threshold_mask

        # Sum over movies to get total score for each auxiliary user in the sub-batch
        sub_batch_scores = np.sum(sim, axis=1)

        # Accumulate the scores for the sub-batch
        scores[start:end] = sub_batch_scores

        # Free memory for this sub-batch
        del candidate_ratings, candidate_timestamps, ml_ratings_matrix, ml_timestamps_matrix, rating_diff, time_diff, sim, sub_batch_scores
        gc.collect()

    # Determine if there is a unique match based on eccentricity
    max_score_idx = np.argmax(scores)
    max_score = scores[max_score_idx]
    sorted_scores = np.sort(scores)[::-1]
    max2_score = sorted_scores[1] if len(sorted_scores) > 1 else 0
    sigma = np.std(scores)
    eccentricity = (max_score - max2_score) / sigma if sigma > 0 else np.inf
    # print(max_score,max2_score,sigma)
    min_score_threshold = 0.1  # Minimum score required for a valid match

    if eccentricity < phi or max_score < min_score_threshold:
        return None  # No unique match found
    else:
        # Return the matched auxiliary user and the score
        matched_aux_user_idx = candidate_aux_user_indices[max_score_idx]
        matched_aux_userId = aux_userIds[matched_aux_user_idx]
        return matched_aux_userId, max_score


In [None]:
import gc
from tqdm import tqdm

phi = 1.5  # Eccentricity threshold
matches = []

# Select the first batch file (or specify the desired batch file directly)
batch_files = [os.path.join(OUTPUT_DIR_NO_TOP_500, f) for f in os.listdir(OUTPUT_DIR_NO_TOP_500) if f.endswith('.csv')]
if not batch_files:
    print("No batch files found in the specified directory.")
else:
    # Process only the first batch file
    batch_file = batch_files[0]  # Change the index if you want to select a different file
    print(f"Processing batch file: {batch_file}")

    # Load the batch_results_df
    batch_results_df = pd.read_csv(batch_file)

    for idx, row in tqdm(batch_results_df.iterrows(), total=batch_results_df.shape[0], desc='Processing rows'):
        ml_user_id = row['ml_user_id']
        aux_users_str = row['aux_users']
        if not aux_users_str:
            continue  # No candidates

        # Convert aux_users_str to a list of candidate IDs
        candidate_aux_user_ids = [int(aux_user_id) for aux_user_id in aux_users_str.split(',') if aux_user_id]

        # Compute scores for the current ml_user_id using sub-batching
        result = compute_scores_for_ml_user(ml_user_id, candidate_aux_user_ids, phi=phi, sub_batch_size=1000)

        if result is not None:
            matched_aux_userId, score = result
            matches.append({'ml_user_id': ml_user_id, 'aux_user_id': matched_aux_userId, 'score': score})
            print(f"MovieLens user {ml_user_id} matched with auxiliary user {matched_aux_userId} (Score: {score})")
        else:
            print(f"No unique match for MovieLens user {ml_user_id}")

        # Explicitly free memory for variables no longer needed
        del candidate_aux_user_ids, result
        gc.collect()  # Force garbage collection

    # Free memory for the DataFrame after processing
    del batch_results_df
    gc.collect()

    # After processing the batch, save the matches
    matches_df = pd.DataFrame(matches)
    matches_df.to_csv('deanonymization_matches_single_batch.csv', index=False)
    print(f"Total matches found: {len(matches_df)}")


In [None]:
# import gc
# from tqdm import tqdm

# phi = 1.5  # Eccentricity threshold
# matches = []

# # List of batch files
# batch_files = [os.path.join(OUTPUT_DIR_NO_TOP_500, f) for f in os.listdir(OUTPUT_DIR_NO_TOP_500) if f.endswith('.csv')]

# for batch_file in tqdm(batch_files, desc='Processing batches'):
#     # Load the batch_results_df
#     batch_results_df = pd.read_csv(batch_file)

#     for idx, row in tqdm(batch_results_df.iterrows()):
#         ml_user_id = row['ml_user_id']
#         aux_users_str = row['aux_users']
#         if not aux_users_str:
#             continue  # No candidates

#         # Convert aux_users_str to a list of candidate IDs
#         candidate_aux_user_ids = [int(aux_user_id) for aux_user_id in aux_users_str.split(',') if aux_user_id]

#         # Compute scores for the current ml_user_id using sub-batching
#         result = compute_scores_for_ml_user(ml_user_id, candidate_aux_user_ids, phi=phi, sub_batch_size=1000)

#         if result is not None:
#             matched_aux_userId, score = result
#             matches.append({'ml_user_id': ml_user_id, 'aux_user_id': matched_aux_userId, 'score': score})
#             print(f"MovieLens user {ml_user_id} matched with auxiliary user {matched_aux_userId} (Score: {score})")
#         else:
#             print(f"No unique match for MovieLens user {ml_user_id}")

#         # Explicitly free memory for variables no longer needed
#         del candidate_aux_user_ids, result
#         gc.collect()  # Force garbage collection
    
#     # Free memory for the DataFrame after processing each batch file
#     del batch_results_df
#     gc.collect()

# # After all batches are processed, save the matches
# matches_df = pd.DataFrame(matches)
# matches_df.to_csv('deanonymization_matches.csv', index=False)
# print(f"Total matches found: {len(matches_df)}")


In [18]:
np.random.seed(0)  # set reproducibility

def compute_scores_for_ml_user(
    ml_user_id,
    candidate_aux_user_ids,
    phi=1.5,
    sub_batch_size=1000,
    max_rating_diff=1,
    max_time_diff=14*24*3600,
    rho0=2,
    d0=30*24*3600,
    selected_movie_indices=None
):
    # Get ml_user_idx
    ml_user_idx = ml_userId_to_row_idx.get(ml_user_id)
    if ml_user_idx is None:
        return None  # ML user ID not found

    if selected_movie_indices is None:
        return None  # Selected movies must be provided

    # Use only the selected movies
    ml_rated_movies = selected_movie_indices
    ml_ratings = database_ratings_matrix[ml_user_idx, ml_rated_movies].toarray().flatten()
    ml_timestamps = database_timestamps_matrix[ml_user_idx, ml_rated_movies].toarray().flatten()
    ml_weights = weights[ml_rated_movies]

    if len(ml_rated_movies) == 0:
        return None  # No movies to process

    num_candidates = len(candidate_aux_user_ids)
    if num_candidates == 0:
        return None  # No candidates found

    # Initialize an array to accumulate scores
    scores = np.zeros(num_candidates)

    # Process auxiliary users in sub-batches
    candidate_aux_user_indices = [
        aux_userId_to_row_idx.get(aux_user_id)
        for aux_user_id in candidate_aux_user_ids
    ]
    candidate_aux_user_indices = [idx for idx in candidate_aux_user_indices if idx is not None]

    for start in range(0, num_candidates, sub_batch_size):
        end = min(start + sub_batch_size, num_candidates)
        sub_batch_indices = candidate_aux_user_indices[start:end]

        # Get candidate data for the current sub-batch
        candidate_ratings_matrix = aux_ratings_matrix[sub_batch_indices][:, ml_rated_movies]
        candidate_timestamps_matrix = aux_timestamps_matrix[sub_batch_indices][:, ml_rated_movies]

        # Convert to dense arrays
        candidate_ratings = candidate_ratings_matrix.toarray()
        candidate_timestamps = candidate_timestamps_matrix.toarray()

        # Prepare ML user data matrices
        ml_ratings_matrix = np.tile(ml_ratings, (len(sub_batch_indices), 1))
        ml_timestamps_matrix = np.tile(ml_timestamps, (len(sub_batch_indices), 1))
        ml_weights_matrix = np.tile(ml_weights, (len(sub_batch_indices), 1))

        # Compute valid mask where both users have rated the same movie
        valid_mask = (ml_ratings_matrix > 0) & (candidate_ratings > 0)

        # Compute absolute differences
        rating_diff = np.abs(ml_ratings_matrix - candidate_ratings)
        time_diff = np.abs(ml_timestamps_matrix - candidate_timestamps)

        # Apply valid mask
        rating_diff[~valid_mask] = 0
        time_diff[~valid_mask] = 0

        # Apply thresholds
        threshold_mask = (rating_diff <= max_rating_diff) & (time_diff <= max_time_diff)

        # Compute exponential similarities
        rating_exp = np.exp(-rating_diff / rho0) * threshold_mask
        time_exp = np.exp(-time_diff / d0) * threshold_mask

        # Calculate similarity score
        sim = (rating_exp + time_exp) * ml_weights_matrix

        # Sum over movies
        sub_batch_scores = np.sum(sim, axis=1)

        # Accumulate the scores
        scores[start:end] = sub_batch_scores

        # Free memory
        del candidate_ratings, candidate_timestamps, ml_ratings_matrix, ml_timestamps_matrix, rating_diff, time_diff, sim, sub_batch_scores
        gc.collect()

    # Determine if there's a unique match
    max_score_idx = np.argmax(scores)
    max_score = scores[max_score_idx]
    sorted_scores = np.sort(scores)[::-1]
    max2_score = sorted_scores[1] if len(sorted_scores) > 1 else 0
    sigma = np.std(scores)
    eccentricity = (max_score - max2_score) / sigma if sigma > 0 else np.inf

    min_score_threshold = 0.95 #adjusts needed

    if eccentricity < phi or max_score < min_score_threshold:
        return None  # No unique match found
    else:
        matched_aux_user_idx = candidate_aux_user_indices[max_score_idx]
        matched_aux_userId = aux_userIds[matched_aux_user_idx]
        return matched_aux_userId, max_score
    


import gc
from tqdm import tqdm

phi = 1.5  # Eccentricity threshold
max_rating_diff = 1
max_time_diff = 14 * 24 * 3600  # 14 days in seconds

# Define the number of ratings N to experiment with
N_values = [2, 4, 6, 8]

# Prepare dictionaries to store success and attempt counts for each N
success_counts = {N: 0 for N in N_values}
attempt_counts = {N: 0 for N in N_values}

# Select the batch file to process
batch_files = [os.path.join(OUTPUT_DIR_NO_TOP_500, f)
               for f in os.listdir(OUTPUT_DIR_NO_TOP_500) if f.endswith('.csv')]
if not batch_files:
    print("No batch files found in the specified directory.")
else:
    # Process only the first batch file (adjust as needed)
    batch_file = batch_files[0]
    print(f"Processing batch file: {batch_file}")

    # Load the batch_results_df
    batch_results_df = pd.read_csv(batch_file)

    for N in N_values:
        print(f"\n--- Starting experiments with N = {N} ratings ---\n")
        matches = []  # Reset matches for each N

        for idx, row in tqdm(batch_results_df.iterrows(), total=batch_results_df.shape[0], desc=f'Processing rows for N={N}'):
            ml_user_id = row['ml_user_id']

            # Get ml_user_idx
            ml_user_idx = ml_userId_to_row_idx.get(ml_user_id)
            if ml_user_idx is None:
                continue  # ML user ID not found

            # Get all movies rated by the MovieLens user
            ml_rated_movies = database_ratings_matrix.getrow(ml_user_idx).indices
            if len(ml_rated_movies) < N:
                continue  # Not enough movies rated

            # Get ml_user's ratings and timestamps
            ml_ratings_full = database_ratings_matrix.getrow(ml_user_idx).toarray().flatten()
            ml_timestamps_full = database_timestamps_matrix.getrow(ml_user_idx).toarray().flatten()

            # Get candidate auxiliary users who have at least one movie in common (from precomputed data)
            aux_users_str = row['aux_users']
            if not aux_users_str:
                continue  # No candidates

            candidate_aux_user_ids = [int(aux_user_id) for aux_user_id in aux_users_str.split(',') if aux_user_id]

            # Initialize list to hold valid auxiliary users
            valid_candidate_aux_user_ids = []
            # Dictionary to hold the number of movies in common for each aux user
            aux_user_common_counts = {}

            for aux_user_id in candidate_aux_user_ids:
                aux_user_idx = aux_userId_to_row_idx.get(aux_user_id)
                if aux_user_idx is None:
                    continue

                # Get auxiliary user's ratings and timestamps
                aux_ratings_full = aux_ratings_matrix.getrow(aux_user_idx).toarray().flatten()
                aux_timestamps_full = aux_timestamps_matrix.getrow(aux_user_idx).toarray().flatten()

                # Apply valid mask where both users have rated the movie
                valid_mask = (ml_ratings_full > 0) & (aux_ratings_full > 0)

                if not np.any(valid_mask):
                    continue  # No overlapping movies

                # Apply rating and time thresholds
                rating_diff = np.abs(ml_ratings_full - aux_ratings_full)
                time_diff = np.abs(ml_timestamps_full - aux_timestamps_full)
                within_rating_threshold = rating_diff <= max_rating_diff
                within_time_threshold = time_diff <= max_time_diff
                within_threshold = valid_mask & within_rating_threshold & within_time_threshold

                count_within_threshold = np.sum(within_threshold)
                if count_within_threshold >= N:
                    valid_candidate_aux_user_ids.append(aux_user_id)
                    aux_user_common_counts[aux_user_id] = within_threshold

            if not valid_candidate_aux_user_ids:
                continue  # No auxiliary users with at least N movies in common

            # Now, randomly select N movies from the overlapping movies between ML user and auxiliary users
            # Find the intersection of movies that are within thresholds for all valid auxiliary users
            # Since auxiliary users may have different overlapping movies, we'll select from the ML user's movies that are within thresholds with any auxiliary user

            # Combine within_threshold masks for all valid auxiliary users
            combined_within_threshold = np.zeros_like(ml_ratings_full, dtype=bool)
            for aux_user_id in valid_candidate_aux_user_ids:
                combined_within_threshold |= aux_user_common_counts[aux_user_id]

            # Indices of movies that are within thresholds with at least one auxiliary user
            candidate_movie_indices = np.where(combined_within_threshold)[0]
            if len(candidate_movie_indices) < N:
                continue  # Not enough movies to select N

            # Randomly select N movies from candidate_movie_indices
            selected_movie_indices = np.random.choice(candidate_movie_indices, size=N, replace=False)

            # Now, attempt de-anonymization using the selected movies
            # Increment attempt count
            attempt_counts[N] += 1

            # Compute scores using only the selected movies
            result = compute_scores_for_ml_user(
                ml_user_id,
                valid_candidate_aux_user_ids,
                phi=phi,
                sub_batch_size=1000,
                selected_movie_indices=selected_movie_indices
            )

            if result is not None:
                matched_aux_userId, score = result
                matches.append({'ml_user_id': ml_user_id, 'aux_user_id': matched_aux_userId, 'score': score})
                success_counts[N] += 1
                print(f"Success: ML user {ml_user_id} matched with auxiliary user {matched_aux_userId} (Score: {score})")
            else:
                print(f"Failure: No unique match for ML user {ml_user_id}")

            # Explicitly free memory
            del candidate_aux_user_ids, valid_candidate_aux_user_ids, result, aux_user_common_counts
            gc.collect()

        # After processing all users for this N, save the matches
        matches_df = pd.DataFrame(matches)
        matches_df.to_csv(f'deanonymization_matches_N_{N}.csv', index=False)
        print(f"Total matches found for N={N}: {len(matches_df)} out of {attempt_counts[N]} attempts")

    # Print success rates
    for N in N_values:
        if attempt_counts[N] > 0:
            success_rate = (success_counts[N] / attempt_counts[N]) * 100
            print(f"Success rate for N={N}: {success_rate:.2f}% ({success_counts[N]} / {attempt_counts[N]})")
        else:
            print(f"No attempts made for N={N}")


Processing batch file: deanonymization_results_no_top_500\batch_0_to_999_results.csv

--- Starting experiments with N = 2 ratings ---



Processing rows for N=2:   1%|          | 6/1000 [00:09<38:28,  2.32s/it]

Success: ML user 342 matched with auxiliary user 740366 (Score: 1.6987186739900109)


Processing rows for N=2:   1%|          | 12/1000 [00:15<24:17,  1.47s/it]

Failure: No unique match for ML user 487


Processing rows for N=2:   1%|▏         | 14/1000 [00:17<22:02,  1.34s/it]

Failure: No unique match for ML user 531


Processing rows for N=2:   2%|▏         | 17/1000 [00:18<11:27,  1.43it/s]

Failure: No unique match for ML user 594


Processing rows for N=2:   2%|▏         | 18/1000 [00:19<11:26,  1.43it/s]

Failure: No unique match for ML user 623


Processing rows for N=2:   2%|▏         | 19/1000 [00:22<23:51,  1.46s/it]

Failure: No unique match for ML user 647


Processing rows for N=2:   2%|▏         | 21/1000 [00:23<14:49,  1.10it/s]

Failure: No unique match for ML user 696


Processing rows for N=2:   2%|▏         | 23/1000 [00:24<12:41,  1.28it/s]

Failure: No unique match for ML user 735


Processing rows for N=2:   2%|▎         | 25/1000 [00:26<12:47,  1.27it/s]

Failure: No unique match for ML user 743


Processing rows for N=2:   3%|▎         | 29/1000 [00:30<20:19,  1.26s/it]

Failure: No unique match for ML user 869


Processing rows for N=2:   3%|▎         | 31/1000 [00:32<20:34,  1.27s/it]

Failure: No unique match for ML user 903


Processing rows for N=2:   4%|▍         | 39/1000 [00:42<31:12,  1.95s/it]

Success: ML user 1023 matched with auxiliary user 31500 (Score: 1.113263946408408)


Processing rows for N=2:   4%|▍         | 43/1000 [00:44<14:31,  1.10it/s]

Failure: No unique match for ML user 1090


Processing rows for N=2:   4%|▍         | 44/1000 [00:46<19:30,  1.22s/it]

Failure: No unique match for ML user 1122


Processing rows for N=2:   4%|▍         | 45/1000 [00:49<29:15,  1.84s/it]

Failure: No unique match for ML user 1155


Processing rows for N=2:   5%|▍         | 48/1000 [00:54<31:26,  1.98s/it]

Failure: No unique match for ML user 1189


Processing rows for N=2:   5%|▌         | 53/1000 [01:01<24:08,  1.53s/it]

Failure: No unique match for ML user 1253


Processing rows for N=2:   6%|▌         | 56/1000 [01:04<16:09,  1.03s/it]

Failure: No unique match for ML user 1308


Processing rows for N=2:   6%|▌         | 58/1000 [01:06<16:52,  1.07s/it]

Failure: No unique match for ML user 1350


Processing rows for N=2:   6%|▌         | 59/1000 [01:07<18:22,  1.17s/it]

Failure: No unique match for ML user 1351


Processing rows for N=2:   6%|▌         | 60/1000 [01:13<37:01,  2.36s/it]

Failure: No unique match for ML user 1399


Processing rows for N=2:   6%|▌         | 61/1000 [01:16<36:53,  2.36s/it]

Failure: No unique match for ML user 1437


Processing rows for N=2:   6%|▋         | 63/1000 [01:17<24:34,  1.57s/it]

Failure: No unique match for ML user 1484


Processing rows for N=2:   7%|▋         | 71/1000 [01:25<14:59,  1.03it/s]

Failure: No unique match for ML user 1715


Processing rows for N=2:   8%|▊         | 75/1000 [01:29<18:15,  1.18s/it]

Success: ML user 1847 matched with auxiliary user 195632 (Score: 1.0812978490237992)


Processing rows for N=2:   8%|▊         | 76/1000 [01:36<44:01,  2.86s/it]

Failure: No unique match for ML user 1851


Processing rows for N=2:   8%|▊         | 77/1000 [01:38<42:43,  2.78s/it]

Failure: No unique match for ML user 1877


Processing rows for N=2:   8%|▊         | 85/1000 [01:45<18:12,  1.19s/it]

Failure: No unique match for ML user 2173


Processing rows for N=2:   9%|▉         | 92/1000 [01:54<32:50,  2.17s/it]

Failure: No unique match for ML user 2291


Processing rows for N=2:   9%|▉         | 94/1000 [01:57<27:48,  1.84s/it]

Success: ML user 2301 matched with auxiliary user 22535 (Score: 1.0860929663407688)


Processing rows for N=2:  10%|▉         | 97/1000 [01:59<15:35,  1.04s/it]

Failure: No unique match for ML user 2394


Processing rows for N=2:  10%|▉         | 99/1000 [02:00<13:31,  1.11it/s]

Failure: No unique match for ML user 2423


Processing rows for N=2:  10%|█         | 100/1000 [02:01<11:55,  1.26it/s]

Failure: No unique match for ML user 2440


Processing rows for N=2:  10%|█         | 101/1000 [02:01<11:31,  1.30it/s]

Failure: No unique match for ML user 2480


Processing rows for N=2:  10%|█         | 104/1000 [02:07<23:40,  1.59s/it]

Failure: No unique match for ML user 2537


Processing rows for N=2:  11%|█         | 106/1000 [02:11<30:33,  2.05s/it]

Failure: No unique match for ML user 2565


Processing rows for N=2:  11%|█         | 109/1000 [02:15<23:21,  1.57s/it]

Failure: No unique match for ML user 2703


Processing rows for N=2:  11%|█▏        | 113/1000 [02:19<18:38,  1.26s/it]

Failure: No unique match for ML user 2813


Processing rows for N=2:  11%|█▏        | 114/1000 [02:20<20:06,  1.36s/it]

Failure: No unique match for ML user 2819


Processing rows for N=2:  12%|█▏        | 117/1000 [02:27<32:04,  2.18s/it]

Failure: No unique match for ML user 2955


Processing rows for N=2:  12%|█▏        | 118/1000 [02:30<38:03,  2.59s/it]

Failure: No unique match for ML user 2970


Processing rows for N=2:  12%|█▏        | 121/1000 [02:39<49:49,  3.40s/it]

Failure: No unique match for ML user 3095


Processing rows for N=2:  12%|█▏        | 122/1000 [02:40<38:48,  2.65s/it]

Failure: No unique match for ML user 3122


Processing rows for N=2:  13%|█▎        | 130/1000 [02:51<38:25,  2.65s/it]

Failure: No unique match for ML user 3349


Processing rows for N=2:  13%|█▎        | 131/1000 [02:52<31:27,  2.17s/it]

Success: ML user 3386 matched with auxiliary user 17965 (Score: 1.000702722807247)


Processing rows for N=2:  13%|█▎        | 132/1000 [02:55<33:29,  2.32s/it]

Failure: No unique match for ML user 3414


Processing rows for N=2:  13%|█▎        | 133/1000 [02:56<29:51,  2.07s/it]

Failure: No unique match for ML user 3440


Processing rows for N=2:  14%|█▎        | 135/1000 [02:58<20:34,  1.43s/it]

Failure: No unique match for ML user 3471


Processing rows for N=2:  14%|█▎        | 136/1000 [02:59<19:13,  1.33s/it]

Failure: No unique match for ML user 3485


Processing rows for N=2:  14%|█▍        | 143/1000 [03:05<13:08,  1.09it/s]

Failure: No unique match for ML user 3695


Processing rows for N=2:  15%|█▍        | 146/1000 [03:08<15:14,  1.07s/it]

Failure: No unique match for ML user 3819


Processing rows for N=2:  15%|█▌        | 150/1000 [03:10<07:04,  2.00it/s]

Failure: No unique match for ML user 3861


Processing rows for N=2:  15%|█▌        | 154/1000 [03:12<09:22,  1.50it/s]

Failure: No unique match for ML user 4028


Processing rows for N=2:  16%|█▌        | 158/1000 [03:13<05:16,  2.66it/s]

Failure: No unique match for ML user 4036


Processing rows for N=2:  16%|█▌        | 162/1000 [03:20<19:20,  1.38s/it]

Failure: No unique match for ML user 4264


Processing rows for N=2:  17%|█▋        | 169/1000 [03:27<23:10,  1.67s/it]

Failure: No unique match for ML user 4396


Processing rows for N=2:  18%|█▊        | 175/1000 [03:31<15:53,  1.16s/it]

Failure: No unique match for ML user 4587


Processing rows for N=2:  18%|█▊        | 176/1000 [03:36<29:17,  2.13s/it]

Failure: No unique match for ML user 4624


Processing rows for N=2:  18%|█▊        | 179/1000 [03:39<21:39,  1.58s/it]

Success: ML user 4653 matched with auxiliary user 215310 (Score: 1.0036993957806855)


Processing rows for N=2:  18%|█▊        | 183/1000 [03:48<39:25,  2.89s/it]

Failure: No unique match for ML user 4772


Processing rows for N=2:  18%|█▊        | 184/1000 [03:49<32:06,  2.36s/it]

Success: ML user 4777 matched with auxiliary user 466499 (Score: 1.1019108832939195)


Processing rows for N=2:  18%|█▊        | 185/1000 [03:55<43:56,  3.24s/it]

Failure: No unique match for ML user 4851


Processing rows for N=2:  19%|█▊        | 186/1000 [03:57<39:56,  2.94s/it]

Failure: No unique match for ML user 4898


Processing rows for N=2:  19%|█▊        | 187/1000 [04:01<44:49,  3.31s/it]

Failure: No unique match for ML user 4947


Processing rows for N=2:  19%|█▉        | 189/1000 [04:05<35:48,  2.65s/it]

Failure: No unique match for ML user 5003


Processing rows for N=2:  20%|█▉        | 195/1000 [04:12<21:26,  1.60s/it]

Failure: No unique match for ML user 5057


Processing rows for N=2:  20%|█▉        | 196/1000 [04:18<40:26,  3.02s/it]

Failure: No unique match for ML user 5101


Processing rows for N=2:  20%|█▉        | 198/1000 [04:19<24:10,  1.81s/it]

Failure: No unique match for ML user 5122


Processing rows for N=2:  20%|██        | 203/1000 [04:22<14:15,  1.07s/it]

Failure: No unique match for ML user 5232


Processing rows for N=2:  20%|██        | 204/1000 [04:23<13:52,  1.05s/it]

Failure: No unique match for ML user 5258


Processing rows for N=2:  21%|██        | 206/1000 [04:25<11:29,  1.15it/s]

Success: ML user 5266 matched with auxiliary user 48781 (Score: 1.101886794356329)


Processing rows for N=2:  21%|██        | 207/1000 [04:26<12:23,  1.07it/s]

Failure: No unique match for ML user 5279


Processing rows for N=2:  21%|██        | 210/1000 [04:28<09:59,  1.32it/s]

Failure: No unique match for ML user 5309


Processing rows for N=2:  22%|██▏       | 215/1000 [04:31<10:37,  1.23it/s]

Failure: No unique match for ML user 5428


Processing rows for N=2:  22%|██▏       | 216/1000 [04:33<14:10,  1.09s/it]

Failure: No unique match for ML user 5437


Processing rows for N=2:  22%|██▏       | 220/1000 [04:34<08:02,  1.62it/s]

Failure: No unique match for ML user 5574


Processing rows for N=2:  22%|██▏       | 222/1000 [04:40<24:05,  1.86s/it]

Failure: No unique match for ML user 5664


Processing rows for N=2:  23%|██▎       | 226/1000 [04:46<20:18,  1.57s/it]

Failure: No unique match for ML user 5763


Processing rows for N=2:  24%|██▎       | 236/1000 [04:50<06:59,  1.82it/s]

Failure: No unique match for ML user 6010


Processing rows for N=2:  24%|██▎       | 237/1000 [04:57<30:02,  2.36s/it]

Failure: No unique match for ML user 6028


Processing rows for N=2:  24%|██▍       | 238/1000 [04:58<24:22,  1.92s/it]

Failure: No unique match for ML user 6050


Processing rows for N=2:  24%|██▍       | 239/1000 [05:00<24:55,  1.97s/it]

Failure: No unique match for ML user 6089


Processing rows for N=2:  24%|██▍       | 240/1000 [05:01<21:32,  1.70s/it]

Failure: No unique match for ML user 6094


Processing rows for N=2:  24%|██▍       | 242/1000 [05:07<33:50,  2.68s/it]

Failure: No unique match for ML user 6143


Processing rows for N=2:  24%|██▍       | 245/1000 [05:14<36:29,  2.90s/it]

Failure: No unique match for ML user 6347


Processing rows for N=2:  26%|██▌       | 256/1000 [05:24<10:21,  1.20it/s]

Failure: No unique match for ML user 6548


Processing rows for N=2:  26%|██▌       | 258/1000 [05:28<16:53,  1.37s/it]

Failure: No unique match for ML user 6558


Processing rows for N=2:  26%|██▌       | 260/1000 [05:36<32:23,  2.63s/it]

Failure: No unique match for ML user 6588


Processing rows for N=2:  26%|██▌       | 261/1000 [05:37<26:07,  2.12s/it]

Failure: No unique match for ML user 6614


Processing rows for N=2:  27%|██▋       | 268/1000 [05:45<21:49,  1.79s/it]

Failure: No unique match for ML user 6783


Processing rows for N=2:  28%|██▊       | 278/1000 [05:51<13:55,  1.16s/it]

Failure: No unique match for ML user 7239


Processing rows for N=2:  28%|██▊       | 279/1000 [05:52<12:52,  1.07s/it]

Failure: No unique match for ML user 7264


Processing rows for N=2:  30%|██▉       | 297/1000 [06:09<23:23,  2.00s/it]

Failure: No unique match for ML user 7572


Processing rows for N=2:  30%|██▉       | 298/1000 [06:09<19:29,  1.67s/it]

Failure: No unique match for ML user 7632


Processing rows for N=2:  30%|███       | 303/1000 [06:13<10:38,  1.09it/s]

Failure: No unique match for ML user 7706


Processing rows for N=2:  30%|███       | 304/1000 [06:15<13:58,  1.21s/it]

Failure: No unique match for ML user 7740


Processing rows for N=2:  31%|███       | 306/1000 [06:17<12:05,  1.05s/it]

Failure: No unique match for ML user 7765


Processing rows for N=2:  31%|███       | 308/1000 [06:20<17:02,  1.48s/it]

Failure: No unique match for ML user 7818


Processing rows for N=2:  31%|███       | 310/1000 [06:24<19:29,  1.70s/it]

Failure: No unique match for ML user 7829


Processing rows for N=2:  31%|███       | 312/1000 [06:27<17:26,  1.52s/it]

Failure: No unique match for ML user 7900


Processing rows for N=2:  31%|███▏      | 313/1000 [06:29<19:18,  1.69s/it]

Failure: No unique match for ML user 7935


Processing rows for N=2:  32%|███▏      | 318/1000 [06:33<12:48,  1.13s/it]

Failure: No unique match for ML user 8056


Processing rows for N=2:  32%|███▏      | 321/1000 [06:35<08:41,  1.30it/s]

Failure: No unique match for ML user 8142


Processing rows for N=2:  32%|███▏      | 322/1000 [06:38<15:48,  1.40s/it]

Failure: No unique match for ML user 8175


Processing rows for N=2:  33%|███▎      | 326/1000 [06:46<27:05,  2.41s/it]

Failure: No unique match for ML user 8217


Processing rows for N=2:  33%|███▎      | 328/1000 [06:47<18:14,  1.63s/it]

Success: ML user 8264 matched with auxiliary user 9120 (Score: 1.282411222643295)


Processing rows for N=2:  33%|███▎      | 329/1000 [06:48<15:41,  1.40s/it]

Failure: No unique match for ML user 8273


Processing rows for N=2:  33%|███▎      | 331/1000 [06:50<13:40,  1.23s/it]

Failure: No unique match for ML user 8371


Processing rows for N=2:  34%|███▍      | 341/1000 [06:55<07:38,  1.44it/s]

Failure: No unique match for ML user 8667


Processing rows for N=2:  34%|███▍      | 344/1000 [06:56<05:36,  1.95it/s]

Failure: No unique match for ML user 8751


Processing rows for N=2:  34%|███▍      | 345/1000 [06:58<09:42,  1.13it/s]

Failure: No unique match for ML user 8791


Processing rows for N=2:  35%|███▍      | 346/1000 [06:59<11:44,  1.08s/it]

Failure: No unique match for ML user 8802


Processing rows for N=2:  35%|███▍      | 348/1000 [07:00<08:14,  1.32it/s]

Failure: No unique match for ML user 8882


Processing rows for N=2:  35%|███▍      | 349/1000 [07:04<16:21,  1.51s/it]

Failure: No unique match for ML user 8898


Processing rows for N=2:  36%|███▌      | 356/1000 [07:10<10:57,  1.02s/it]

Failure: No unique match for ML user 9153


Processing rows for N=2:  36%|███▌      | 357/1000 [07:11<12:31,  1.17s/it]

Failure: No unique match for ML user 9209


Processing rows for N=2:  36%|███▌      | 358/1000 [07:13<13:15,  1.24s/it]

Failure: No unique match for ML user 9233


Processing rows for N=2:  36%|███▌      | 359/1000 [07:15<16:45,  1.57s/it]

Failure: No unique match for ML user 9236


Processing rows for N=2:  36%|███▌      | 361/1000 [07:16<11:29,  1.08s/it]

Failure: No unique match for ML user 9244


Processing rows for N=2:  36%|███▌      | 362/1000 [07:17<10:52,  1.02s/it]

Failure: No unique match for ML user 9275


Processing rows for N=2:  36%|███▋      | 364/1000 [07:19<10:28,  1.01it/s]

Failure: No unique match for ML user 9308


Processing rows for N=2:  37%|███▋      | 370/1000 [07:21<05:48,  1.81it/s]

Failure: No unique match for ML user 9489


Processing rows for N=2:  37%|███▋      | 371/1000 [07:23<08:54,  1.18it/s]

Failure: No unique match for ML user 9500


Processing rows for N=2:  37%|███▋      | 373/1000 [07:24<07:28,  1.40it/s]

Failure: No unique match for ML user 9655


Processing rows for N=2:  38%|███▊      | 375/1000 [07:26<09:49,  1.06it/s]

Failure: No unique match for ML user 9698


Processing rows for N=2:  38%|███▊      | 376/1000 [07:28<10:21,  1.00it/s]

Failure: No unique match for ML user 9818


Processing rows for N=2:  38%|███▊      | 385/1000 [07:33<09:06,  1.12it/s]

Failure: No unique match for ML user 9931


Processing rows for N=2:  39%|███▉      | 388/1000 [07:39<18:45,  1.84s/it]

Failure: No unique match for ML user 10041


Processing rows for N=2:  39%|███▉      | 390/1000 [07:42<17:30,  1.72s/it]

Failure: No unique match for ML user 10095


Processing rows for N=2:  40%|███▉      | 397/1000 [07:47<08:39,  1.16it/s]

Failure: No unique match for ML user 10212


Processing rows for N=2:  40%|████      | 400/1000 [07:51<11:59,  1.20s/it]

Success: ML user 10278 matched with auxiliary user 44865 (Score: 1.1115660122923297)


Processing rows for N=2:  40%|████      | 405/1000 [07:55<09:17,  1.07it/s]

Failure: No unique match for ML user 10412


Processing rows for N=2:  41%|████      | 408/1000 [07:58<10:54,  1.11s/it]

Failure: No unique match for ML user 10536


Processing rows for N=2:  41%|████      | 409/1000 [07:59<11:33,  1.17s/it]

Failure: No unique match for ML user 10543


Processing rows for N=2:  41%|████▏     | 413/1000 [08:06<19:25,  1.99s/it]

Failure: No unique match for ML user 10608


Processing rows for N=2:  42%|████▏     | 418/1000 [08:13<17:37,  1.82s/it]

Failure: No unique match for ML user 10766


Processing rows for N=2:  42%|████▏     | 421/1000 [08:18<15:52,  1.64s/it]

Failure: No unique match for ML user 10776


Processing rows for N=2:  42%|████▏     | 423/1000 [08:27<32:09,  3.34s/it]

Failure: No unique match for ML user 10792


Processing rows for N=2:  42%|████▎     | 425/1000 [08:29<20:53,  2.18s/it]

Success: ML user 10830 matched with auxiliary user 280942 (Score: 1.0985043723314707)


Processing rows for N=2:  43%|████▎     | 428/1000 [08:31<11:19,  1.19s/it]

Failure: No unique match for ML user 10948


Processing rows for N=2:  43%|████▎     | 433/1000 [08:34<07:55,  1.19it/s]

Failure: No unique match for ML user 11052


Processing rows for N=2:  43%|████▎     | 434/1000 [08:35<09:32,  1.01s/it]

Failure: No unique match for ML user 11064


Processing rows for N=2:  44%|████▍     | 439/1000 [08:41<14:04,  1.50s/it]

Failure: No unique match for ML user 11217


Processing rows for N=2:  45%|████▍     | 448/1000 [08:49<14:11,  1.54s/it]

Failure: No unique match for ML user 11568


Processing rows for N=2:  45%|████▌     | 450/1000 [08:50<11:48,  1.29s/it]

Failure: No unique match for ML user 11710


Processing rows for N=2:  45%|████▌     | 454/1000 [08:55<12:50,  1.41s/it]

Failure: No unique match for ML user 11810


Processing rows for N=2:  46%|████▌     | 456/1000 [08:56<08:42,  1.04it/s]

Failure: No unique match for ML user 11904


Processing rows for N=2:  46%|████▌     | 457/1000 [08:58<09:56,  1.10s/it]

Failure: No unique match for ML user 11907


Processing rows for N=2:  46%|████▌     | 458/1000 [08:58<08:49,  1.02it/s]

Success: ML user 11909 matched with auxiliary user 315740 (Score: 1.115456452070691)


Processing rows for N=2:  46%|████▌     | 459/1000 [09:01<13:16,  1.47s/it]

Failure: No unique match for ML user 12028


Processing rows for N=2:  46%|████▌     | 461/1000 [09:06<16:23,  1.83s/it]

Failure: No unique match for ML user 12032


Processing rows for N=2:  46%|████▋     | 463/1000 [09:08<10:52,  1.21s/it]

Success: ML user 12120 matched with auxiliary user 82916 (Score: 0.965053126297303)


Processing rows for N=2:  47%|████▋     | 466/1000 [09:10<07:58,  1.12it/s]

Failure: No unique match for ML user 12415


Processing rows for N=2:  47%|████▋     | 467/1000 [09:13<15:46,  1.78s/it]

Failure: No unique match for ML user 12421


Processing rows for N=2:  47%|████▋     | 470/1000 [09:17<12:02,  1.36s/it]

Failure: No unique match for ML user 12495


Processing rows for N=2:  47%|████▋     | 474/1000 [09:20<10:12,  1.16s/it]

Failure: No unique match for ML user 12601


Processing rows for N=2:  48%|████▊     | 478/1000 [09:22<05:36,  1.55it/s]

Failure: No unique match for ML user 12634


Processing rows for N=2:  48%|████▊     | 480/1000 [09:24<06:15,  1.38it/s]

Failure: No unique match for ML user 12674


Processing rows for N=2:  48%|████▊     | 484/1000 [09:28<09:22,  1.09s/it]

Failure: No unique match for ML user 12982


Processing rows for N=2:  49%|████▉     | 488/1000 [09:30<05:32,  1.54it/s]

Failure: No unique match for ML user 13074


Processing rows for N=2:  49%|████▉     | 491/1000 [09:33<06:20,  1.34it/s]

Failure: No unique match for ML user 13146


Processing rows for N=2:  50%|████▉     | 495/1000 [09:40<14:36,  1.74s/it]

Failure: No unique match for ML user 13372


Processing rows for N=2:  50%|████▉     | 497/1000 [09:43<14:31,  1.73s/it]

Failure: No unique match for ML user 13411


Processing rows for N=2:  50%|████▉     | 498/1000 [09:46<16:21,  1.95s/it]

Failure: No unique match for ML user 13415


Processing rows for N=2:  50%|█████     | 500/1000 [09:47<11:47,  1.42s/it]

Failure: No unique match for ML user 13449


Processing rows for N=2:  50%|█████     | 502/1000 [09:49<08:48,  1.06s/it]

Failure: No unique match for ML user 13494


Processing rows for N=2:  50%|█████     | 505/1000 [09:54<13:44,  1.67s/it]

Failure: No unique match for ML user 13567


Processing rows for N=2:  51%|█████     | 509/1000 [09:58<11:47,  1.44s/it]

Failure: No unique match for ML user 13631


Processing rows for N=2:  51%|█████     | 512/1000 [10:01<09:34,  1.18s/it]

Failure: No unique match for ML user 13739


Processing rows for N=2:  52%|█████▏    | 518/1000 [10:10<13:04,  1.63s/it]

Failure: No unique match for ML user 13894


Processing rows for N=2:  52%|█████▏    | 521/1000 [10:11<07:03,  1.13it/s]

Failure: No unique match for ML user 13929


Processing rows for N=2:  52%|█████▏    | 524/1000 [10:14<08:35,  1.08s/it]

Failure: No unique match for ML user 13962


Processing rows for N=2:  53%|█████▎    | 528/1000 [10:17<06:27,  1.22it/s]

Success: ML user 14009 matched with auxiliary user 1808 (Score: 1.0718529386741917)


Processing rows for N=2:  54%|█████▎    | 536/1000 [10:27<09:18,  1.20s/it]

Failure: No unique match for ML user 14244


Processing rows for N=2:  54%|█████▍    | 540/1000 [10:29<05:55,  1.29it/s]

Failure: No unique match for ML user 14371


Processing rows for N=2:  55%|█████▍    | 546/1000 [10:41<16:25,  2.17s/it]

Success: ML user 14556 matched with auxiliary user 893275 (Score: 1.2030027131480994)


Processing rows for N=2:  55%|█████▌    | 551/1000 [10:43<05:52,  1.27it/s]

Failure: No unique match for ML user 14793


Processing rows for N=2:  55%|█████▌    | 554/1000 [10:48<10:17,  1.38s/it]

Failure: No unique match for ML user 14909


Processing rows for N=2:  56%|█████▌    | 557/1000 [10:51<09:28,  1.28s/it]

Success: ML user 14969 matched with auxiliary user 202614 (Score: 1.3756413498872648)


Processing rows for N=2:  56%|█████▌    | 561/1000 [10:54<07:01,  1.04it/s]

Failure: No unique match for ML user 15029


Processing rows for N=2:  56%|█████▋    | 565/1000 [10:59<09:24,  1.30s/it]

Success: ML user 15159 matched with auxiliary user 920103 (Score: 0.97416266153158)


Processing rows for N=2:  57%|█████▋    | 567/1000 [11:00<06:22,  1.13it/s]

Failure: No unique match for ML user 15191


Processing rows for N=2:  57%|█████▋    | 569/1000 [11:02<07:22,  1.03s/it]

Failure: No unique match for ML user 15293


Processing rows for N=2:  57%|█████▋    | 574/1000 [11:04<04:35,  1.54it/s]

Failure: No unique match for ML user 15396


Processing rows for N=2:  57%|█████▊    | 575/1000 [11:06<06:39,  1.07it/s]

Failure: No unique match for ML user 15406


Processing rows for N=2:  58%|█████▊    | 578/1000 [11:13<14:01,  2.00s/it]

Failure: No unique match for ML user 15511


Processing rows for N=2:  58%|█████▊    | 579/1000 [11:14<12:48,  1.83s/it]

Failure: No unique match for ML user 15517


Processing rows for N=2:  58%|█████▊    | 581/1000 [11:18<12:38,  1.81s/it]

Failure: No unique match for ML user 15590


Processing rows for N=2:  58%|█████▊    | 584/1000 [11:19<06:44,  1.03it/s]

Failure: No unique match for ML user 15697


Processing rows for N=2:  59%|█████▉    | 588/1000 [11:23<08:44,  1.27s/it]

Failure: No unique match for ML user 15894


Processing rows for N=2:  59%|█████▉    | 590/1000 [11:26<07:58,  1.17s/it]

Failure: No unique match for ML user 15914


Processing rows for N=2:  59%|█████▉    | 591/1000 [11:28<09:19,  1.37s/it]

Failure: No unique match for ML user 16012


Processing rows for N=2:  60%|██████    | 602/1000 [11:40<07:14,  1.09s/it]

Failure: No unique match for ML user 16196


Processing rows for N=2:  60%|██████    | 603/1000 [11:42<08:51,  1.34s/it]

Failure: No unique match for ML user 16207


Processing rows for N=2:  60%|██████    | 604/1000 [11:46<12:26,  1.89s/it]

Failure: No unique match for ML user 16217


Processing rows for N=2:  61%|██████    | 606/1000 [11:47<08:54,  1.36s/it]

Failure: No unique match for ML user 16249


Processing rows for N=2:  61%|██████    | 608/1000 [11:49<08:28,  1.30s/it]

Failure: No unique match for ML user 16277


Processing rows for N=2:  61%|██████    | 610/1000 [11:53<09:58,  1.53s/it]

Failure: No unique match for ML user 16320


Processing rows for N=2:  61%|██████    | 611/1000 [12:00<19:48,  3.05s/it]

Failure: No unique match for ML user 16363


Processing rows for N=2:  61%|██████▏   | 613/1000 [12:01<11:47,  1.83s/it]

Failure: No unique match for ML user 16445


Processing rows for N=2:  61%|██████▏   | 614/1000 [12:02<10:06,  1.57s/it]

Failure: No unique match for ML user 16475


Processing rows for N=2:  62%|██████▏   | 615/1000 [12:04<11:23,  1.78s/it]

Failure: No unique match for ML user 16506


Processing rows for N=2:  62%|██████▏   | 616/1000 [12:07<13:32,  2.12s/it]

Failure: No unique match for ML user 16537


Processing rows for N=2:  62%|██████▏   | 618/1000 [12:08<08:06,  1.27s/it]

Failure: No unique match for ML user 16573


Processing rows for N=2:  62%|██████▏   | 619/1000 [12:09<07:39,  1.21s/it]

Failure: No unique match for ML user 16602


Processing rows for N=2:  62%|██████▏   | 624/1000 [12:14<07:34,  1.21s/it]

Failure: No unique match for ML user 16677


Processing rows for N=2:  63%|██████▎   | 627/1000 [12:16<04:31,  1.37it/s]

Failure: No unique match for ML user 16686


Processing rows for N=2:  63%|██████▎   | 628/1000 [12:17<04:54,  1.26it/s]

Failure: No unique match for ML user 16740


Processing rows for N=2:  63%|██████▎   | 630/1000 [12:20<07:55,  1.28s/it]

Failure: No unique match for ML user 16838


Processing rows for N=2:  63%|██████▎   | 634/1000 [12:22<04:07,  1.48it/s]

Failure: No unique match for ML user 16952


Processing rows for N=2:  64%|██████▎   | 636/1000 [12:22<03:21,  1.80it/s]

Failure: No unique match for ML user 17066


Processing rows for N=2:  64%|██████▍   | 641/1000 [12:24<01:49,  3.28it/s]

Failure: No unique match for ML user 17148


Processing rows for N=2:  64%|██████▍   | 643/1000 [12:29<08:44,  1.47s/it]

Failure: No unique match for ML user 17258


Processing rows for N=2:  65%|██████▍   | 646/1000 [12:30<05:38,  1.05it/s]

Failure: No unique match for ML user 17297


Processing rows for N=2:  66%|██████▌   | 658/1000 [12:39<04:22,  1.30it/s]

Failure: No unique match for ML user 17663


Processing rows for N=2:  66%|██████▋   | 664/1000 [12:44<05:43,  1.02s/it]

Failure: No unique match for ML user 17790


Processing rows for N=2:  66%|██████▋   | 665/1000 [12:47<07:55,  1.42s/it]

Success: ML user 17793 matched with auxiliary user 116876 (Score: 1.0058810440549515)


Processing rows for N=2:  67%|██████▋   | 670/1000 [12:51<04:58,  1.11it/s]

Failure: No unique match for ML user 17893


Processing rows for N=2:  67%|██████▋   | 673/1000 [12:54<06:33,  1.20s/it]

Failure: No unique match for ML user 17946


Processing rows for N=2:  68%|██████▊   | 676/1000 [12:55<03:49,  1.41it/s]

Failure: No unique match for ML user 17973


Processing rows for N=2:  68%|██████▊   | 677/1000 [12:56<04:10,  1.29it/s]

Failure: No unique match for ML user 17980


Processing rows for N=2:  70%|██████▉   | 696/1000 [13:09<05:16,  1.04s/it]

Failure: No unique match for ML user 18260


Processing rows for N=2:  70%|███████   | 700/1000 [13:12<03:55,  1.28it/s]

Success: ML user 18401 matched with auxiliary user 619562 (Score: 0.9807596012905005)


Processing rows for N=2:  70%|███████   | 702/1000 [13:14<04:25,  1.12it/s]

Failure: No unique match for ML user 18462


Processing rows for N=2:  71%|███████   | 710/1000 [13:20<04:40,  1.04it/s]

Failure: No unique match for ML user 18791


Processing rows for N=2:  71%|███████   | 712/1000 [13:22<05:15,  1.10s/it]

Failure: No unique match for ML user 18853


Processing rows for N=2:  72%|███████▏  | 717/1000 [13:27<05:21,  1.14s/it]

Failure: No unique match for ML user 18996


Processing rows for N=2:  72%|███████▏  | 719/1000 [13:28<04:11,  1.12it/s]

Failure: No unique match for ML user 19048


Processing rows for N=2:  72%|███████▏  | 721/1000 [13:30<04:14,  1.10it/s]

Failure: No unique match for ML user 19064


Processing rows for N=2:  72%|███████▏  | 724/1000 [13:33<05:04,  1.10s/it]

Failure: No unique match for ML user 19156


Processing rows for N=2:  73%|███████▎  | 726/1000 [13:34<03:44,  1.22it/s]

Failure: No unique match for ML user 19175


Processing rows for N=2:  73%|███████▎  | 727/1000 [13:38<06:47,  1.49s/it]

Failure: No unique match for ML user 19276


Processing rows for N=2:  73%|███████▎  | 731/1000 [13:40<03:44,  1.20it/s]

Success: ML user 19400 matched with auxiliary user 8841 (Score: 1.034195052270418)


Processing rows for N=2:  73%|███████▎  | 733/1000 [13:44<06:35,  1.48s/it]

Failure: No unique match for ML user 19496


Processing rows for N=2:  73%|███████▎  | 734/1000 [13:45<06:04,  1.37s/it]

Failure: No unique match for ML user 19531


Processing rows for N=2:  74%|███████▎  | 736/1000 [13:49<06:41,  1.52s/it]

Failure: No unique match for ML user 19550


Processing rows for N=2:  74%|███████▍  | 739/1000 [13:54<07:28,  1.72s/it]

Success: ML user 19615 matched with auxiliary user 46704 (Score: 1.4786696985642451)


Processing rows for N=2:  74%|███████▍  | 740/1000 [13:55<05:58,  1.38s/it]

Failure: No unique match for ML user 19616


Processing rows for N=2:  74%|███████▍  | 741/1000 [13:56<05:29,  1.27s/it]

Failure: No unique match for ML user 19624


Processing rows for N=2:  74%|███████▍  | 742/1000 [13:56<04:55,  1.15s/it]

Failure: No unique match for ML user 19667


Processing rows for N=2:  75%|███████▍  | 749/1000 [14:00<02:45,  1.51it/s]

Failure: No unique match for ML user 19848


Processing rows for N=2:  75%|███████▌  | 752/1000 [14:02<02:38,  1.56it/s]

Failure: No unique match for ML user 19948


Processing rows for N=2:  75%|███████▌  | 754/1000 [14:04<04:18,  1.05s/it]

Failure: No unique match for ML user 19966


Processing rows for N=2:  76%|███████▌  | 756/1000 [14:06<04:24,  1.08s/it]

Failure: No unique match for ML user 19982


Processing rows for N=2:  76%|███████▌  | 762/1000 [14:10<03:23,  1.17it/s]

Failure: No unique match for ML user 20192


Processing rows for N=2:  77%|███████▋  | 768/1000 [14:14<02:51,  1.35it/s]

Failure: No unique match for ML user 20451


Processing rows for N=2:  77%|███████▋  | 769/1000 [14:17<05:01,  1.31s/it]

Failure: No unique match for ML user 20468


Processing rows for N=2:  77%|███████▋  | 773/1000 [14:21<03:52,  1.03s/it]

Failure: No unique match for ML user 20496


Processing rows for N=2:  77%|███████▋  | 774/1000 [14:22<04:14,  1.13s/it]

Failure: No unique match for ML user 20504


Processing rows for N=2:  78%|███████▊  | 777/1000 [14:24<03:13,  1.15it/s]

Failure: No unique match for ML user 20628


Processing rows for N=2:  78%|███████▊  | 785/1000 [14:28<01:50,  1.94it/s]

Failure: No unique match for ML user 20753


Processing rows for N=2:  79%|███████▊  | 786/1000 [14:31<03:20,  1.07it/s]

Failure: No unique match for ML user 20835


Processing rows for N=2:  79%|███████▊  | 787/1000 [14:34<05:31,  1.55s/it]

Failure: No unique match for ML user 20846


Processing rows for N=2:  79%|███████▉  | 788/1000 [14:35<05:21,  1.52s/it]

Failure: No unique match for ML user 20849


Processing rows for N=2:  79%|███████▉  | 790/1000 [14:42<08:42,  2.49s/it]

Failure: No unique match for ML user 20876


Processing rows for N=2:  79%|███████▉  | 794/1000 [14:48<06:29,  1.89s/it]

Failure: No unique match for ML user 21023


Processing rows for N=2:  80%|████████  | 801/1000 [14:51<02:56,  1.13it/s]

Success: ML user 21216 matched with auxiliary user 7609 (Score: 1.0120590051575364)


Processing rows for N=2:  80%|████████  | 803/1000 [14:57<05:29,  1.67s/it]

Failure: No unique match for ML user 21237


Processing rows for N=2:  80%|████████  | 805/1000 [14:59<04:31,  1.39s/it]

Failure: No unique match for ML user 21284


Processing rows for N=2:  81%|████████  | 806/1000 [15:00<04:28,  1.38s/it]

Failure: No unique match for ML user 21306


Processing rows for N=2:  81%|████████  | 810/1000 [15:12<10:43,  3.39s/it]

Failure: No unique match for ML user 21364


Processing rows for N=2:  81%|████████  | 811/1000 [15:13<08:34,  2.72s/it]

Success: ML user 21367 matched with auxiliary user 8283 (Score: 0.9602567680080778)


Processing rows for N=2:  81%|████████▏ | 813/1000 [15:15<06:30,  2.09s/it]

Failure: No unique match for ML user 21496


Processing rows for N=2:  82%|████████▏ | 817/1000 [15:18<03:20,  1.10s/it]

Failure: No unique match for ML user 21607


Processing rows for N=2:  82%|████████▏ | 818/1000 [15:20<03:53,  1.28s/it]

Failure: No unique match for ML user 21614


Processing rows for N=2:  82%|████████▏ | 824/1000 [15:29<04:59,  1.70s/it]

Failure: No unique match for ML user 21871


Processing rows for N=2:  84%|████████▎ | 835/1000 [15:39<04:35,  1.67s/it]

Failure: No unique match for ML user 22363


Processing rows for N=2:  84%|████████▎ | 837/1000 [15:45<06:37,  2.44s/it]

Failure: No unique match for ML user 22479


Processing rows for N=2:  84%|████████▍ | 845/1000 [15:55<05:31,  2.14s/it]

Failure: No unique match for ML user 22746


Processing rows for N=2:  85%|████████▍ | 846/1000 [15:57<05:16,  2.05s/it]

Failure: No unique match for ML user 22794


Processing rows for N=2:  85%|████████▌ | 851/1000 [16:03<04:10,  1.68s/it]

Failure: No unique match for ML user 22979


Processing rows for N=2:  85%|████████▌ | 852/1000 [16:05<04:31,  1.83s/it]

Failure: No unique match for ML user 23068


Processing rows for N=2:  86%|████████▌ | 856/1000 [16:07<02:03,  1.17it/s]

Failure: No unique match for ML user 23133


Processing rows for N=2:  87%|████████▋ | 867/1000 [16:17<02:55,  1.32s/it]

Failure: No unique match for ML user 23497


Processing rows for N=2:  87%|████████▋ | 871/1000 [16:22<02:38,  1.23s/it]

Failure: No unique match for ML user 23638


Processing rows for N=2:  87%|████████▋ | 873/1000 [16:23<02:04,  1.02it/s]

Failure: No unique match for ML user 23696


Processing rows for N=2:  88%|████████▊ | 877/1000 [16:28<02:48,  1.37s/it]

Success: ML user 23897 matched with auxiliary user 79466 (Score: 1.6882205881078431)


Processing rows for N=2:  88%|████████▊ | 879/1000 [16:29<01:44,  1.16it/s]

Failure: No unique match for ML user 23960


Processing rows for N=2:  88%|████████▊ | 882/1000 [16:32<02:28,  1.26s/it]

Failure: No unique match for ML user 24140


Processing rows for N=2:  88%|████████▊ | 883/1000 [16:36<04:04,  2.09s/it]

Failure: No unique match for ML user 24156


Processing rows for N=2:  88%|████████▊ | 885/1000 [16:40<03:50,  2.01s/it]

Failure: No unique match for ML user 24397


Processing rows for N=2:  89%|████████▊ | 887/1000 [16:42<02:45,  1.46s/it]

Failure: No unique match for ML user 24436


Processing rows for N=2:  89%|████████▉ | 890/1000 [16:46<02:39,  1.45s/it]

Failure: No unique match for ML user 24527


Processing rows for N=2:  89%|████████▉ | 891/1000 [16:46<02:12,  1.22s/it]

Failure: No unique match for ML user 24529


Processing rows for N=2:  90%|████████▉ | 895/1000 [16:51<02:21,  1.35s/it]

Failure: No unique match for ML user 24605


Processing rows for N=2:  90%|████████▉ | 897/1000 [16:53<01:57,  1.14s/it]

Failure: No unique match for ML user 24682


Processing rows for N=2:  90%|█████████ | 900/1000 [16:56<01:32,  1.09it/s]

Success: ML user 24821 matched with auxiliary user 18768 (Score: 1.0718829341643028)


Processing rows for N=2:  90%|█████████ | 903/1000 [16:57<01:03,  1.54it/s]

Failure: No unique match for ML user 24926


Processing rows for N=2:  90%|█████████ | 904/1000 [17:01<02:05,  1.30s/it]

Failure: No unique match for ML user 24931


Processing rows for N=2:  91%|█████████ | 906/1000 [17:03<01:59,  1.28s/it]

Failure: No unique match for ML user 25065


Processing rows for N=2:  91%|█████████ | 910/1000 [17:08<01:55,  1.29s/it]

Failure: No unique match for ML user 25164


Processing rows for N=2:  91%|█████████ | 911/1000 [17:12<03:11,  2.15s/it]

Failure: No unique match for ML user 25165


Processing rows for N=2:  91%|█████████▏| 913/1000 [17:15<02:51,  1.98s/it]

Failure: No unique match for ML user 25225


Processing rows for N=2:  92%|█████████▏| 915/1000 [17:17<02:07,  1.50s/it]

Success: ML user 25308 matched with auxiliary user 201844 (Score: 1.2237615730090035)


Processing rows for N=2:  92%|█████████▏| 917/1000 [17:19<01:38,  1.18s/it]

Failure: No unique match for ML user 25315


Processing rows for N=2:  92%|█████████▏| 920/1000 [17:26<02:12,  1.66s/it]

Success: ML user 25423 matched with auxiliary user 463554 (Score: 1.285282441454849)


Processing rows for N=2:  93%|█████████▎| 930/1000 [17:31<00:38,  1.84it/s]

Failure: No unique match for ML user 25851


Processing rows for N=2:  93%|█████████▎| 931/1000 [17:36<01:51,  1.61s/it]

Failure: No unique match for ML user 25871


Processing rows for N=2:  93%|█████████▎| 934/1000 [17:41<01:38,  1.50s/it]

Failure: No unique match for ML user 25964


Processing rows for N=2:  94%|█████████▎| 937/1000 [17:47<02:17,  2.18s/it]

Success: ML user 26051 matched with auxiliary user 1035416 (Score: 1.0456532803823515)


Processing rows for N=2:  94%|█████████▍| 939/1000 [17:51<02:12,  2.17s/it]

Failure: No unique match for ML user 26125


Processing rows for N=2:  94%|█████████▍| 941/1000 [17:54<01:41,  1.71s/it]

Failure: No unique match for ML user 26144


Processing rows for N=2:  94%|█████████▍| 942/1000 [17:55<01:40,  1.73s/it]

Failure: No unique match for ML user 26243


Processing rows for N=2:  95%|█████████▌| 953/1000 [18:04<00:33,  1.42it/s]

Failure: No unique match for ML user 26584


Processing rows for N=2:  95%|█████████▌| 954/1000 [18:05<00:44,  1.04it/s]

Success: ML user 26598 matched with auxiliary user 691171 (Score: 0.9965756980879275)


Processing rows for N=2:  96%|█████████▌| 955/1000 [18:08<01:07,  1.51s/it]

Failure: No unique match for ML user 26600


Processing rows for N=2:  96%|█████████▌| 962/1000 [18:18<01:04,  1.69s/it]

Failure: No unique match for ML user 26744


Processing rows for N=2:  97%|█████████▋| 966/1000 [18:21<00:37,  1.11s/it]

Failure: No unique match for ML user 26907


Processing rows for N=2:  97%|█████████▋| 974/1000 [18:35<01:19,  3.05s/it]

Failure: No unique match for ML user 27120


Processing rows for N=2:  98%|█████████▊| 977/1000 [18:38<00:43,  1.87s/it]

Failure: No unique match for ML user 27171


Processing rows for N=2:  98%|█████████▊| 980/1000 [18:44<00:41,  2.08s/it]

Failure: No unique match for ML user 27423


Processing rows for N=2:  98%|█████████▊| 982/1000 [18:45<00:23,  1.33s/it]

Failure: No unique match for ML user 27472


Processing rows for N=2:  99%|█████████▊| 987/1000 [18:51<00:16,  1.26s/it]

Failure: No unique match for ML user 27583


Processing rows for N=2:  99%|█████████▉| 989/1000 [18:53<00:12,  1.13s/it]

Failure: No unique match for ML user 27726


Processing rows for N=2:  99%|█████████▉| 991/1000 [18:54<00:08,  1.02it/s]

Failure: No unique match for ML user 27751


Processing rows for N=2:  99%|█████████▉| 993/1000 [18:56<00:06,  1.07it/s]

Failure: No unique match for ML user 27862


Processing rows for N=2:  99%|█████████▉| 994/1000 [18:57<00:05,  1.02it/s]

Failure: No unique match for ML user 27869


Processing rows for N=2: 100%|█████████▉| 996/1000 [19:00<00:05,  1.28s/it]

Failure: No unique match for ML user 27902


Processing rows for N=2: 100%|██████████| 1000/1000 [19:03<00:00,  1.14s/it]


Total matches found for N=2: 29 out of 318 attempts

--- Starting experiments with N = 4 ratings ---



Processing rows for N=4:   1%|          | 12/1000 [00:14<24:13,  1.47s/it]

Success: ML user 487 matched with auxiliary user 48758 (Score: 1.0616324168138107)


Processing rows for N=4:   1%|▏         | 14/1000 [00:16<21:55,  1.33s/it]

Success: ML user 531 matched with auxiliary user 308978 (Score: 1.0850464764398127)


Processing rows for N=4:   5%|▍         | 48/1000 [00:51<29:44,  1.87s/it]

Failure: No unique match for ML user 1189


Processing rows for N=4:   6%|▌         | 56/1000 [01:01<15:37,  1.01it/s]

Success: ML user 1308 matched with auxiliary user 72246 (Score: 1.190155534091202)


Processing rows for N=4:   6%|▌         | 60/1000 [01:10<36:00,  2.30s/it]

Success: ML user 1399 matched with auxiliary user 319659 (Score: 1.7307448718545178)


Processing rows for N=4:   9%|▉         | 92/1000 [01:49<32:42,  2.16s/it]

Success: ML user 2291 matched with auxiliary user 37128 (Score: 2.6123130999514874)


Processing rows for N=4:   9%|▉         | 94/1000 [01:52<27:39,  1.83s/it]

Success: ML user 2301 matched with auxiliary user 22535 (Score: 1.8167084975250027)


Processing rows for N=4:  12%|█▏        | 118/1000 [02:23<36:54,  2.51s/it]

Success: ML user 2970 matched with auxiliary user 1120522 (Score: 2.211094088264726)


Processing rows for N=4:  14%|█▎        | 136/1000 [02:50<17:50,  1.24s/it]

Success: ML user 3485 matched with auxiliary user 105311 (Score: 1.6760758757710628)


Processing rows for N=4:  15%|█▌        | 154/1000 [03:03<09:05,  1.55it/s]

Success: ML user 4028 matched with auxiliary user 1779 (Score: 2.128895198123047)


Processing rows for N=4:  16%|█▌        | 162/1000 [03:10<19:14,  1.38s/it]

Success: ML user 4264 matched with auxiliary user 97270 (Score: 1.7998539660812427)


Processing rows for N=4:  19%|█▊        | 186/1000 [03:46<38:18,  2.82s/it]

Success: ML user 4898 matched with auxiliary user 97757 (Score: 2.4907533445409102)


Processing rows for N=4:  22%|██▏       | 216/1000 [04:19<13:21,  1.02s/it]

Success: ML user 5437 matched with auxiliary user 311004 (Score: 1.4105172962917223)


Processing rows for N=4:  24%|██▎       | 237/1000 [04:42<28:36,  2.25s/it]

Success: ML user 6028 matched with auxiliary user 680572 (Score: 1.9349021926754468)


Processing rows for N=4:  24%|██▍       | 245/1000 [04:59<35:37,  2.83s/it]

Success: ML user 6347 matched with auxiliary user 841387 (Score: 1.4906986228336572)


Processing rows for N=4:  26%|██▌       | 258/1000 [05:12<16:31,  1.34s/it]

Success: ML user 6558 matched with auxiliary user 129365 (Score: 2.218928454368574)


Processing rows for N=4:  26%|██▌       | 260/1000 [05:20<32:17,  2.62s/it]

Success: ML user 6588 matched with auxiliary user 91480 (Score: 1.7397550365078047)


Processing rows for N=4:  30%|██▉       | 297/1000 [05:52<18:52,  1.61s/it]

Failure: No unique match for ML user 7572


Processing rows for N=4:  30%|███       | 303/1000 [05:56<10:11,  1.14it/s]

Success: ML user 7706 matched with auxiliary user 8309 (Score: 1.4475865008196558)


Processing rows for N=4:  31%|███       | 308/1000 [06:03<16:17,  1.41s/it]

Success: ML user 7818 matched with auxiliary user 41432 (Score: 1.2881271303111195)


Processing rows for N=4:  32%|███▏      | 318/1000 [06:15<12:31,  1.10s/it]

Success: ML user 8056 matched with auxiliary user 776180 (Score: 1.6453853309722786)


Processing rows for N=4:  33%|███▎      | 326/1000 [06:28<26:43,  2.38s/it]

Failure: No unique match for ML user 8217


Processing rows for N=4:  36%|███▌      | 356/1000 [06:50<10:47,  1.01s/it]

Failure: No unique match for ML user 9153


Processing rows for N=4:  36%|███▌      | 359/1000 [06:55<15:58,  1.49s/it]

Success: ML user 9236 matched with auxiliary user 128946 (Score: 1.2251118334620594)


Processing rows for N=4:  38%|███▊      | 375/1000 [07:05<09:09,  1.14it/s]

Success: ML user 9698 matched with auxiliary user 20497 (Score: 1.2748848239230575)


Processing rows for N=4:  42%|████▏     | 423/1000 [08:03<31:33,  3.28s/it]

Success: ML user 10792 matched with auxiliary user 28882 (Score: 2.168822179264669)


Processing rows for N=4:  44%|████▍     | 439/1000 [08:16<13:45,  1.47s/it]

Success: ML user 11217 matched with auxiliary user 98149 (Score: 1.6769337132935789)


Processing rows for N=4:  46%|████▌     | 459/1000 [08:34<11:34,  1.28s/it]

Success: ML user 12028 matched with auxiliary user 1132559 (Score: 1.192507480387794)


Processing rows for N=4:  47%|████▋     | 467/1000 [08:46<15:10,  1.71s/it]

Success: ML user 12421 matched with auxiliary user 630450 (Score: 1.7431660230361867)


Processing rows for N=4:  50%|█████     | 505/1000 [09:23<13:31,  1.64s/it]

Success: ML user 13567 matched with auxiliary user 250211 (Score: 1.5767258541873437)


Processing rows for N=4:  55%|█████▍    | 545/1000 [10:08<21:26,  2.83s/it]

Success: ML user 14556 matched with auxiliary user 319659 (Score: 1.8872454159859213)


Processing rows for N=4:  57%|█████▋    | 569/1000 [10:28<06:41,  1.07it/s]

Success: ML user 15293 matched with auxiliary user 42716 (Score: 1.249182461920362)


Processing rows for N=4:  61%|██████    | 611/1000 [11:23<17:51,  2.75s/it]

Success: ML user 16363 matched with auxiliary user 16450 (Score: 1.5974496514730097)


Processing rows for N=4:  71%|███████   | 712/1000 [12:39<04:10,  1.15it/s]

Success: ML user 18853 matched with auxiliary user 9833 (Score: 1.1660687478046814)


Processing rows for N=4:  72%|███████▏  | 719/1000 [12:44<03:55,  1.19it/s]

Success: ML user 19048 matched with auxiliary user 22888 (Score: 1.2211230643698918)


Processing rows for N=4:  73%|███████▎  | 733/1000 [13:00<06:27,  1.45s/it]

Success: ML user 19496 matched with auxiliary user 125138 (Score: 1.499379365752407)


Processing rows for N=4:  75%|███████▍  | 749/1000 [13:14<02:38,  1.58it/s]

Success: ML user 19848 matched with auxiliary user 105375 (Score: 1.496149804999398)


Processing rows for N=4:  77%|███████▋  | 774/1000 [13:36<04:04,  1.08s/it]

Success: ML user 20504 matched with auxiliary user 8638 (Score: 1.3693829555168715)


Processing rows for N=4:  81%|████████  | 810/1000 [14:22<10:33,  3.34s/it]

Failure: No unique match for ML user 21364


Processing rows for N=4:  82%|████████▏ | 818/1000 [14:30<03:42,  1.23s/it]

Success: ML user 21614 matched with auxiliary user 125257 (Score: 1.4298088050098725)


Processing rows for N=4:  84%|████████▍ | 845/1000 [15:05<05:28,  2.12s/it]

Success: ML user 22746 matched with auxiliary user 16271 (Score: 1.1303150425372468)


Processing rows for N=4:  88%|████████▊ | 882/1000 [15:40<02:18,  1.17s/it]

Success: ML user 24140 matched with auxiliary user 18767 (Score: 1.2279166040841298)


Processing rows for N=4:  90%|████████▉ | 897/1000 [15:59<01:52,  1.09s/it]

Failure: No unique match for ML user 24682


Processing rows for N=4:  96%|█████████▌| 961/1000 [17:20<01:29,  2.29s/it]

Success: ML user 26744 matched with auxiliary user 128799 (Score: 1.0189669347594772)


Processing rows for N=4:  97%|█████████▋| 974/1000 [17:37<01:19,  3.06s/it]

Success: ML user 27120 matched with auxiliary user 97757 (Score: 2.40439628261444)


Processing rows for N=4: 100%|██████████| 1000/1000 [18:02<00:00,  1.08s/it]


Total matches found for N=4: 39 out of 45 attempts

--- Starting experiments with N = 6 ratings ---



Processing rows for N=6:   1%|▏         | 14/1000 [00:16<21:11,  1.29s/it]

Success: ML user 531 matched with auxiliary user 308978 (Score: 1.7318998386528157)


Processing rows for N=6:   9%|▉         | 92/1000 [01:47<32:41,  2.16s/it]

Success: ML user 2291 matched with auxiliary user 6 (Score: 2.909537117974631)


Processing rows for N=6:  12%|█▏        | 118/1000 [02:20<36:37,  2.49s/it]

Success: ML user 2970 matched with auxiliary user 1120522 (Score: 3.0486564860549095)


Processing rows for N=6:  26%|██▌       | 260/1000 [05:14<37:41,  3.06s/it]

Success: ML user 6588 matched with auxiliary user 91480 (Score: 2.359941448353122)


Processing rows for N=6:  30%|██▉       | 297/1000 [05:45<18:52,  1.61s/it]

Failure: No unique match for ML user 7572


Processing rows for N=6:  33%|███▎      | 326/1000 [06:20<26:28,  2.36s/it]

Success: ML user 8217 matched with auxiliary user 169973 (Score: 1.986059338296304)


Processing rows for N=6:  36%|███▌      | 359/1000 [06:47<14:52,  1.39s/it]

Success: ML user 9236 matched with auxiliary user 169973 (Score: 1.7679568068301046)


Processing rows for N=6:  42%|████▏     | 423/1000 [07:54<31:43,  3.30s/it]

Success: ML user 10792 matched with auxiliary user 246679 (Score: 2.9854642153470383)


Processing rows for N=6:  55%|█████▍    | 545/1000 [09:59<21:29,  2.83s/it]

Success: ML user 14556 matched with auxiliary user 319659 (Score: 2.9644511946020216)


Processing rows for N=6:  73%|███████▎  | 733/1000 [12:46<06:30,  1.46s/it]

Success: ML user 19496 matched with auxiliary user 125138 (Score: 2.1133775302010407)


Processing rows for N=6:  90%|████████▉ | 897/1000 [15:44<01:51,  1.09s/it]

Success: ML user 24682 matched with auxiliary user 2242 (Score: 2.0924346104926634)


Processing rows for N=6: 100%|██████████| 1000/1000 [17:45<00:00,  1.07s/it]


Total matches found for N=6: 10 out of 11 attempts

--- Starting experiments with N = 8 ratings ---



Processing rows for N=8:  30%|██▉       | 297/1000 [05:41<18:45,  1.60s/it]

Success: ML user 7572 matched with auxiliary user 16515 (Score: 3.191223661146097)


Processing rows for N=8:  33%|███▎      | 326/1000 [06:16<26:29,  2.36s/it]

Success: ML user 8217 matched with auxiliary user 169973 (Score: 2.5130606234070187)


Processing rows for N=8:  36%|███▌      | 359/1000 [06:42<14:57,  1.40s/it]

Success: ML user 9236 matched with auxiliary user 128946 (Score: 2.619855915934093)


Processing rows for N=8:  90%|████████▉ | 896/1000 [15:26<02:35,  1.49s/it]

Success: ML user 24682 matched with auxiliary user 2242 (Score: 2.7408551420403366)


Processing rows for N=8: 100%|██████████| 1000/1000 [17:24<00:00,  1.04s/it]

Total matches found for N=8: 4 out of 4 attempts
Success rate for N=2: 9.12% (29 / 318)
Success rate for N=4: 86.67% (39 / 45)
Success rate for N=6: 90.91% (10 / 11)
Success rate for N=8: 100.00% (4 / 4)



