In [2]:

"""
Swiss Bank Complaint Synthetic Data Generator

This module generates realistic synthetic data for a bank complaint management system.
It creates fake customer profiles and various types of banking complaints with 
realistic details, then stores them in both MongoDB and MySQL databases.

Key Features:
- Generates realistic customer profiles with banking details
- Creates themed complaints (fraud, account issues, ATM problems, etc.)
- Populates both NoSQL (MongoDB) and SQL (MySQL) databases
- Maintains referential integrity between customers and complaints
- Supports various complaint channels and severity levels
"""

'\nSwiss Bank Complaint Synthetic Data Generator\n\nThis module generates realistic synthetic data for a bank complaint management system.\nIt creates fake customer profiles and various types of banking complaints with \nrealistic details, then stores them in both MongoDB and MySQL databases.\n\nKey Features:\n- Generates realistic customer profiles with banking details\n- Creates themed complaints (fraud, account issues, ATM problems, etc.)\n- Populates both NoSQL (MongoDB) and SQL (MySQL) databases\n- Maintains referential integrity between customers and complaints\n- Supports various complaint channels and severity levels\n'

In [3]:
#channels - branch, email, chat, mobile_app, phone   
#Themes - fraudulent_activities, deposit_issues, account_freezes, atm_issues, dispute_resolution     
#   

In [4]:
from pymongo import MongoClient
import pandas as pd

In [5]:
# Connect to local MongoDB
client = MongoClient("mongodb://localhost:27017/")

# Step 4: Access the 'swiss_bank' database
db = client['swiss_bank']

# Step 5: Load 'complaints' and 'customers' collections
complaints_collection = db['complaints']
customers_collection = db['customers']

# Step 6: Convert MongoDB documents to pandas DataFrame
complaints_df = pd.DataFrame(list(complaints_collection.find()))
customers_df = pd.DataFrame(list(customers_collection.find()))

# Optional: Drop the '_id' column if you don't need it
complaints_df.drop(columns=['_id'], inplace=True, errors='ignore')
customers_df.drop(columns=['_id'], inplace=True, errors='ignore')




In [6]:
display(complaints_df.head())

Unnamed: 0,complaint_id,customer_id,theme,title,description,channel,severity,submission_date,status,attachments,related_transactions,customer_sentiment,urgency_keywords,resolution_time_expected,financial_impact
0,f7b2042e-7d76-4ca2-aaaa-688649e66e1a,e10035e3-45d6-47c2-9d7a-7cba653dff7b,fraudulent_activities,Suspicious transactions I didn't authorize,There are multiple charges on my card totaling...,email,medium,2025-06-27T01:41:54.276178,pending_review,[police_report.pdf],[{'transaction_id': '9d50c0d6-23da-4d92-a7f9-c...,concerned,"[urgent, fraudulent, unauthorized, stolen, ide...",51,1837.12
1,50d343e1-d217-4004-8fd3-652801a5f261,43ac920d-7fd5-4f65-beae-c942dd2a5e57,account_freezes,Account freeze causing financial hardship,The account freeze is causing severe financial...,email,high,2025-07-02T01:41:54.278177,new,[],[{'transaction_id': '9b59313c-a165-4c4c-b947-f...,neutral,"[frozen, hold, suspended, cannot access, finan...",42,2024.31
2,f877bb3d-924a-4c60-b153-306f22be4655,a7686522-e953-45ea-a081-41e1af6c3ef1,account_freezes,My account has been frozen without explanation,Your system flagged my account for suspicious ...,branch,medium,2025-06-26T01:41:54.279177,escalated,[],[{'transaction_id': '08f501c5-5ca4-40fe-8591-8...,neutral,"[frozen, hold, suspended, cannot access, finan...",99,3369.81
3,9489e77d-38d0-4d29-b48d-011c70179b08,044fb359-9635-4dc9-b6d0-7c97477ffa2d,dispute_resolution,No response to my dispute claim,I filed a dispute for $1343.07 in fraudulent c...,email,medium,2025-06-25T01:41:54.279177,pending_review,[police_report.pdf],[{'transaction_id': 'e3842b6f-5bc1-4a68-af71-8...,angry,"[denied, no response, inadequate, refused, too...",34,1343.07
4,407894e8-e0c2-4887-8d2e-9f19bcf0b604,fc5eba86-c7b5-4ba8-b7b9-86e8a56f1fb9,fraudulent_activities,Suspicious transactions I didn't authorize,I noticed $4676.13 was withdrawn from my accou...,chat,medium,2025-06-29T01:41:54.280177,resolved,[bank_statement.pdf],[{'transaction_id': 'e166f99b-8fba-417b-b175-f...,disappointed,"[urgent, fraudulent, unauthorized, stolen, ide...",75,4676.13


In [7]:
duplicate_count = (complaints_df['customer_id'].value_counts() >= 2).sum()
print(f"Number of duplicate complaint IDs: {duplicate_count}")


Number of duplicate complaint IDs: 56


In [12]:
customers_df.columns

Index(['customer_id', 'name', 'email', 'phone', 'account_number',
       'account_type', 'registration_date', 'previous_complaints',
       'credit_score', 'monthly_balance', 'location', 'age', 'occupation'],
      dtype='object')

In [9]:
complaints_df['theme'].value_counts()

theme
fraudulent_activities    59
deposit_issues           46
account_freezes          37
atm_issues               35
dispute_resolution       23
Name: count, dtype: int64

In [10]:
customers_df.head()

Unnamed: 0,customer_id,name,email,phone,account_number,account_type,registration_date,previous_complaints,credit_score,monthly_balance,location,age,occupation
0,99f7ac13-0f6a-458b-81aa-ff5cc6031655,Julie Johnson,acontreras@example.org,761.454.8812x0593,ROFB53770049603280,business,2021-02-14,5,326,41873.67,Hardyfurt,25,Freight forwarder
1,2b2278c5-1736-49a7-a9d0-8716b8bd05dd,Kristy Fowler,leedanielle@example.org,955-954-6416x9234,TBXL95242082100419,savings,2016-01-07,0,557,4232.55,New Melissaside,27,"Conservator, museum/gallery"
2,9d813e7b-cd5c-4bcd-b11d-2f0895f2dc70,Brandon Calhoun,rwilson@example.org,001-267-405-3356x71409,HICZ79698948201265,business,2015-11-09,4,568,28350.03,New Alexandra,55,"Research officer, trade union"
3,2bc766a9-d833-4ad0-bec6-2b8b54768f27,Paul Jones,walterbailey@example.com,001-366-246-1636,RCFI83400440537529,checking,2018-09-05,5,623,10723.41,Tiffanyborough,23,IT trainer
4,76565569-d684-4285-b4b2-0a89c266a217,Amy Burns,todddavis@example.org,401-765-4708x69916,ZKOD20028254733935,checking,2015-09-30,2,778,34483.74,Sancheztown,29,"Designer, ceramics/pottery"


In [11]:
duplicate_count = (customers_df['customer_id'].value_counts() >= 2).sum()
print(f"Number of duplicate complaint IDs: {duplicate_count}")


Number of duplicate complaint IDs: 0
