In [1]:

"""
Swiss Bank Complaint Synthetic Data Generator

This module generates realistic synthetic data for a bank complaint management system.
It creates fake customer profiles and various types of banking complaints with 
realistic details, then stores them in both MongoDB and MySQL databases.

Key Features:
- Generates realistic customer profiles with banking details
- Creates themed complaints (fraud, account issues, ATM problems, etc.)
- Populates both NoSQL (MongoDB) and SQL (MySQL) databases
- Maintains referential integrity between customers and complaints
- Supports various complaint channels and severity levels
"""

'\nSwiss Bank Complaint Synthetic Data Generator\n\nThis module generates realistic synthetic data for a bank complaint management system.\nIt creates fake customer profiles and various types of banking complaints with \nrealistic details, then stores them in both MongoDB and MySQL databases.\n\nKey Features:\n- Generates realistic customer profiles with banking details\n- Creates themed complaints (fraud, account issues, ATM problems, etc.)\n- Populates both NoSQL (MongoDB) and SQL (MySQL) databases\n- Maintains referential integrity between customers and complaints\n- Supports various complaint channels and severity levels\n'

In [2]:
#channels - branch, email, chat, mobile_app, phone   
#Themes - fraudulent_activities, deposit_issues, account_freezes, atm_issues, dispute_resolution     

In [3]:
from pymongo import MongoClient
import pandas as pd

In [6]:
# Connect to local MongoDB
client = MongoClient("mongodb://localhost:27017/")

# Step 4: Access the 'swiss_bank' database
db = client['swiss_bank']

# Step 5: Load 'complaints' and 'customers' collections
complaints_collection = db['complaints']
customers_collection = db['customers']

# Step 6: Convert MongoDB documents to pandas DataFrame
complaints_df = pd.DataFrame(list(complaints_collection.find()))
customers_df = pd.DataFrame(list(customers_collection.find()))

# Optional: Drop the '_id' column if you don't need it
complaints_df.drop(columns=['_id'], inplace=True, errors='ignore')
customers_df.drop(columns=['_id'], inplace=True, errors='ignore')



In [7]:
display(complaints_df.head())

Unnamed: 0,complaint_id,customer_id,theme,title,description,channel,severity,submission_date,status,attachments,related_transactions,customer_sentiment,urgency_keywords,resolution_time_expected,financial_impact
0,68d65540-b669-4267-9127-1dddb5e08007,f1ab2c88-d310-4723-bb7d-acd1013885b8,dispute_resolution,Refused to investigate obvious fraud,I filed a dispute for $3137.38 in fraudulent c...,mobile_app,high,2025-06-27T01:20:46.188953,resolved,"[receipt.jpg, police_report.pdf, photo_evidenc...",[{'transaction_id': 'f2669c52-1fa0-4754-9a86-4...,disappointed,"[denied, no response, inadequate, refused, too...",55,3137.38
1,5d578fd4-8327-492c-9123-f2e4b58a98f6,e5bb446b-70ef-4a9a-9a99-03c673eaf667,atm_issues,ATM charged me but didn't dispense money,I made a $3136.06 deposit at the ATM on 06/14/...,mobile_app,high,2025-07-01T01:20:46.189952,escalated,[],[{'transaction_id': '76aa7eef-f888-4ae8-bd51-8...,neutral,"[ate my card, no cash, not credited, out of or...",34,3136.06
2,d991195c-f8dc-4344-af78-8988930e641d,08bcbd12-af25-47fc-b6c8-d635e05eb8aa,dispute_resolution,My fraud dispute was denied unfairly,I provided clear evidence that the $104.83 cha...,phone,high,2025-06-27T01:20:46.189952,new,"[bank_statement.pdf, bank_statement.pdf]",[{'transaction_id': '53145659-c8f3-46fb-8775-8...,concerned,"[denied, no response, inadequate, refused, too...",72,104.83
3,7f37fa45-4e59-42d6-8f01-305e28b9b29c,6ba276d1-1f5c-499c-8940-11c912fee000,atm_issues,ATM out of order after taking my transaction,I made a $4505.72 deposit at the ATM on 06/10/...,branch,medium,2025-06-29T01:20:46.190967,resolved,[],[{'transaction_id': '82c3df10-3c26-4bdf-a232-3...,neutral,"[ate my card, no cash, not credited, out of or...",98,4505.72
4,a1fda9c8-d641-498f-b31d-c13b622b5ca7,7c36a8d5-e182-48fb-a1b4-e1290162a6bf,atm_issues,ATM out of order after taking my transaction,The ATM processed my withdrawal of $1033.69 an...,mobile_app,medium,2025-06-29T01:20:46.190967,resolved,[],[{'transaction_id': 'ff62a700-3d65-4030-bfe9-5...,angry,"[ate my card, no cash, not credited, out of or...",67,1033.69


In [7]:
duplicate_count = (complaints_df['customer_id'].value_counts() >= 2).sum()
print(f"Number of duplicate complaint IDs: {duplicate_count}")


Number of duplicate complaint IDs: 56


In [12]:
customers_df.columns

Index(['customer_id', 'name', 'email', 'phone', 'account_number',
       'account_type', 'registration_date', 'previous_complaints',
       'credit_score', 'monthly_balance', 'location', 'age', 'occupation'],
      dtype='object')

In [9]:
complaints_df['theme'].value_counts()

theme
fraudulent_activities    59
deposit_issues           46
account_freezes          37
atm_issues               35
dispute_resolution       23
Name: count, dtype: int64

In [8]:
customers_df.head()

Unnamed: 0,customer_id,name,email,phone,account_number,account_type,registration_date,previous_complaints,credit_score,monthly_balance,location,age,occupation
0,64a175be-668f-40b3-a2e5-7e6901b6ae53,Pruthvi T Anumandla,panumandla@iquestsols.com,19405940918,QXVS82393754839570,business,2022-12-24,3,412,31870.33,New Katherine,59,"Buyer, retail"
1,6ba276d1-1f5c-499c-8940-11c912fee000,Chnadra,chandras@iquestsols.com,15153060541,MJJY24718608419851,business,2019-07-12,2,391,38986.09,North Debbie,43,Lexicographer
2,54d8c57c-8136-4672-a522-241932ac4d4b,Kasturi Golla,kasturigolla@gmail.com,15157717829,EUQL29997269335775,checking,2016-01-05,4,440,28822.87,Katherineshire,69,Professor Emeritus
3,80b7fb3e-c203-45de-bfb5-3577095dfc50,Jeffrey Schmidt,jessicabenitez@example.org,11516195414,CSSJ98536534887716,premium,2017-12-22,4,509,25114.01,West Lisaside,35,"Secretary, company"
4,7a996e56-6594-43d8-bf59-45e3c6d4c4db,Meghan Anderson,mindyweaver@example.org,18941821561,QGZO35147192220046,checking,2023-04-07,5,472,49084.57,East David,33,Geophysical data processor


In [11]:
duplicate_count = (customers_df['customer_id'].value_counts() >= 2).sum()
print(f"Number of duplicate complaint IDs: {duplicate_count}")


Number of duplicate complaint IDs: 0
