In [46]:
import pandas as pd

transactional_data_url = 'https://gist.githubusercontent.com/cloudwalk-tests/76993838e65d7e0f988f40f1b1909c97/raw/9ceae962009236d3570f46e59ce9aa334e4e290f/transactional-sample.csv'
transactional_data = pd.read_csv(transactional_data_url)[['user_id', 'merchant_id', 'transaction_amount', 'transaction_id']]

entity_summary_url = 'https://raw.githubusercontent.com/luizarnoni/CloudWalk---Risk-Analyst-Case/main/entity_summary.csv'
entity_summary = pd.read_csv(entity_summary_url)[['entity_id', 'entity_type', 'num_suspicious_transactions']]

suspicious_transactions_url = 'https://raw.githubusercontent.com/luizarnoni/CloudWalk---Risk-Analyst-Case/main/suspicious_transactions.csv'
suspicious_transactions = pd.read_csv(suspicious_transactions_url)[['transaction_id', 'score']]

merged_data = pd.merge(transactional_data, suspicious_transactions, on='transaction_id', how='left')

fraud_evaluation_dict = {
    'transaction_id': [],
    'fraud_chance': [],
    'listed_as_suspicious': [],
    'user_related_suspicion_count': [],
    'merchant_related_suspicion_count': []
}

for _, transaction_row in merged_data.iterrows():
    transaction_id = transaction_row['transaction_id']
    user_id = transaction_row['user_id']
    merchant_id = transaction_row['merchant_id']

    user_related = user_id in entity_summary[entity_summary['entity_id'] == user_id]['entity_id'].values
    merchant_related = merchant_id in entity_summary[entity_summary['entity_id'] == merchant_id]['entity_id'].values

    if not pd.isnull(transaction_row['score']) and transaction_row['score'] >= 5 and (user_related or merchant_related):
        fraud_chance = 'high'
        listed_as_suspicious = True
    elif not pd.isnull(transaction_row['score']) and 2 < transaction_row['score'] <= 4 and (user_related or merchant_related):
        fraud_chance = 'medium-high'
        listed_as_suspicious = True
    elif not pd.isnull(transaction_row['score']) and 0 < transaction_row['score'] <= 2 and (user_related or merchant_related):
        fraud_chance = 'medium'
        listed_as_suspicious = True
    else:
        fraud_chance = 'no evidence'
        listed_as_suspicious = False

    user_related_suspicion_count = entity_summary[entity_summary['entity_id'] == user_id]['num_suspicious_transactions'].values[0] if user_related else 0
    merchant_related_suspicion_count = entity_summary[entity_summary['entity_id'] == merchant_id]['num_suspicious_transactions'].values[0] if merchant_related else 0

    fraud_evaluation_dict['transaction_id'].append(transaction_id)
    fraud_evaluation_dict['fraud_chance'].append(fraud_chance)
    fraud_evaluation_dict['listed_as_suspicious'].append(listed_as_suspicious)
    fraud_evaluation_dict['user_related_suspicion_count'].append(user_related_suspicion_count)
    fraud_evaluation_dict['merchant_related_suspicion_count'].append(merchant_related_suspicion_count)

fraud_evaluation_df = pd.DataFrame(fraud_evaluation_dict)

fraud_evaluation_df['fraud_chance'] = pd.Categorical(fraud_evaluation_df['fraud_chance'],
                                                     categories=['high', 'medium-high', 'medium', 'no evidence'],
                                                     ordered=True)
fraud_evaluation_df = fraud_evaluation_df.sort_values(by='fraud_chance')

fraud_evaluation_df.to_csv('fraud_evaluation_results.csv', index=False)
