In [None]:
%pip install pandas
%pip install fuzzywuzzy
%pip install numpy
%pip install rapidfuzz

In [None]:
import pandas as pd
from fuzzywuzzy import process

In [None]:
# Load transaction and contact data from CSV files
transactions_df = pd.read_csv('../data/raw/upload-transactions.csv', delimiter=';')
contacts_df = pd.read_csv('../data/raw/contacts_export.csv', delimiter=',')

print(contacts_df.head())


In [None]:
# Preprocess data: Convert 'Detail' in transactions and 'Name' in contacts to lowercase for case-insensitive matching
def preprocess_name(name):
    if isinstance(name, str):
        return name.lower().strip()
    return ''

transactions_df['Observaciones'] = transactions_df['Observaciones'].apply(preprocess_name)
contacts_df['Name'] = contacts_df['Name'].apply(preprocess_name)

print(transactions_df['Observaciones'].head())
print(contacts_df['Name'].head())

In [None]:
# Function to find the top 2 matches for each transaction detail
def find_top_matches(detail, name_list, n=2):
    matches = process.extract(detail, name_list, limit=n)
    return matches

# Apply the matching function to each transaction detail
transactions_df['Matches'] = transactions_df['Observaciones'].apply(lambda x: find_top_matches(x, contacts_df['Name']))

# Extract match details into separate columns
transactions_df['Match_1'] = transactions_df['Matches'].apply(lambda x: x[0][0] if len(x) > 0 else None)
transactions_df['Score_1'] = transactions_df['Matches'].apply(lambda x: x[0][1] if len(x) > 0 else None)
transactions_df['Match_2'] = transactions_df['Matches'].apply(lambda x: x[1][0] if len(x) > 1 else None)
transactions_df['Score_2'] = transactions_df['Matches'].apply(lambda x: x[1][1] if len(x) > 1 else None)

# Display the results
result_df = transactions_df[['transaction_amount', 'transaction_date', 'Observaciones', 'Match_1', 'Score_1', 'Match_2', 'Score_2']]
print(result_df)

# Save the result to a CSV file
result_df.to_csv('../data/raw/matched_transactions.csv', index=False)