In [1]:
import pandas as pd
import random
import string
from faker import Faker

In [2]:
# Initialize Faker
fake = Faker()

# Number of synthetic records
num_records = 1000

# Helper Functions
def random_string(length=10):
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))

def random_iban():
    # Generate a fake IBAN-like string
    return "IBAN" + ''.join(random.choices(string.digits, k=20))

def random_iso_currency():
    return random.choice(["EUR", "USD", "GBP", "JPY", "CHF"])

def random_channel():
    return random.choice(["W", "A", "O", "F", "C"])

def random_causal():
    # Random text limited to 140 characters
    return fake.text(max_nb_chars=140)

# Create synthetic dataset
data = {
    "ID_recipient_account": [random_iban() for _ in range(num_records)],
    "ID_recipient_country": [fake.country_code() for _ in range(num_records)],
    "ID_recipient_subsidiary": [random_string(5) if random.random() > 0.1 else "" for _ in range(num_records)],
    "ID_recipient_institute": [random_string(4) if random.random() > 0.1 else "" for _ in range(num_records)],
    "Abroad": [random.choice(["1", "0"]) for _ in range(num_records)],
    "SEPA": [random.choice(["1", "0"]) for _ in range(num_records)],
    "Causal": [random_causal() for _ in range(num_records)],
    "Currency": [random_iso_currency() for _ in range(num_records)],
    "Equivalent_value": [round(random.uniform(10, 10000), 2) for _ in range(num_records)],
    "Channel": [random_channel() for _ in range(num_records)],
    "Instant": [random.choice(["1", "0"]) for _ in range(num_records)],
    "UserAgent": [fake.user_agent() if random.random() > 0.2 else "" for _ in range(num_records)],
    "Timestamp": [fake.date_time_this_year().strftime("%Y/%m/%d %H:%M:%S") for _ in range(num_records)],
    "ID_sender_account": [random_iban() for _ in range(num_records)],
    "ID_sender_subsidiary": [random_string(5) if random.random() > 0.1 else "" for _ in range(num_records)],
    "Label_fraud_post": [random.choice(["1", "0"]) for _ in range(num_records)]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("synthetic_dataset.csv", index=False)

print("Synthetic dataset created and saved as 'synthetic_dataset.csv'.")

Synthetic dataset created and saved as 'synthetic_dataset.csv'.


In [3]:
df

Unnamed: 0,ID_recipient_account,ID_recipient_country,ID_recipient_subsidiary,ID_recipient_institute,Abroad,SEPA,Causal,Currency,Equivalent_value,Channel,Instant,UserAgent,Timestamp,ID_sender_account,ID_sender_subsidiary,Label_fraud_post
0,IBAN35095550848445960962,SD,AQ6P0,O43A,1,1,Anything argue sister hear to. Father program ...,CHF,6458.70,A,0,Mozilla/5.0 (Windows; U; Windows NT 5.2) Apple...,2024/11/01 00:26:00,IBAN61445993110641946220,LW6SG,0
1,IBAN17793867027397458191,ML,BMAOM,1BLH,0,0,Woman huge live tend.,EUR,2268.50,W,1,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_7;...,2024/08/13 06:55:27,IBAN57058972775844384345,R7876,1
2,IBAN40883182170721073195,LB,YB0K0,ELEH,1,0,Education prepare stuff visit anything chair. ...,USD,851.84,O,0,Mozilla/5.0 (Macintosh; PPC Mac OS X 10_5_3 rv...,2024/03/12 15:00:34,IBAN93498530423123956449,C9F2V,0
3,IBAN00689388635219848599,KZ,KVXZD,,0,0,Return ability institution central bill custom...,GBP,8283.50,O,1,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6...,2024/05/30 07:05:58,IBAN76623401512564762417,CZIJZ,0
4,IBAN19295108404346137619,GB,RW68Y,,0,1,Little half while manager agreement which memo...,USD,2912.95,W,1,Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_7...,2024/04/16 01:04:40,IBAN34143395757620981790,PA2CL,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,IBAN49421734134453654809,GH,Z9TWO,SQTF,1,1,Consider on child important. Material along sk...,USD,4002.27,A,0,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3)...,2024/05/12 18:07:28,IBAN08354108466707449356,9SEVD,0
996,IBAN63025008423845766393,AT,FC74X,7R2A,0,1,Offer matter at consider where. After individu...,CHF,3618.76,O,0,,2024/02/22 07:21:12,IBAN49604324657551456182,0Q60D,1
997,IBAN32355199577243917600,AZ,9P7QM,B9KD,0,0,Toward he price forget.\nHair threat early. Ga...,GBP,7741.63,C,1,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3)...,2024/08/31 02:39:11,IBAN09593131833352332160,RWMQE,1
998,IBAN82079585738639801304,BN,L11HW,MIJ4,1,1,Effort market worry return community clearly t...,GBP,4374.38,C,1,,2024/02/16 11:59:59,IBAN37134825505067473283,U87KO,1
