In [14]:
import pandas as pd
import numpy as np
import random

# Load original fingerprinted dataset
df = pd.read_csv("Financial_Records.csv")

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Define columns by type (from your description)
numeric_cols = ['age', 'credit_amount', 'duration', 'installment_rate',
                'existing_credits', 'liable_people', 'residence_since', 'monthly_rent_or_mortgage']

categorical_cols = ['sex', 'marital_status', 'job', 'employment_since', 'foreign', 'credit_history', 
                    'purpose', 'debtors', 'checking_account', 'savings', 'housing', 'property',
                    'online_banking', 'tel']

target_col = 'default'

# 1. Add small Gaussian noise to numeric columns
for col in numeric_cols:
    if col in df.columns:
        std_dev = df[col].std()
        noise = np.random.normal(0, 0.01 * std_dev, size=df.shape[0])
        df[col] = df[col] + noise
        # Clip to avoid unrealistic values (e.g., negative age)
        if (df[col] >= 0).all():
            df[col] = df[col].clip(lower=0)

# 2. Swap a small percentage of categorical values randomly (label-preserving)
swap_fraction = 0.05  # 5% of values per column
for col in categorical_cols:
    if col in df.columns:
        n = len(df)
        indices = df.sample(frac=swap_fraction, random_state=42).index
        values = df.loc[indices, col].sample(frac=1.0, random_state=99).values  # shuffled values
        df.loc[indices, col] = values

# Optional: Shuffle rows (preserves structure but makes row-to-row matching harder)
df = df.sample(frac=1.0, random_state=999).reset_index(drop=True)

# 3. Save the attacked version
df.to_csv("Financial_Records_attacked-random-noise.csv", index=False)
print("Saved attacked dataset as Financial_Records_attacked-random-noise.csv")

Saved attacked dataset as Financial_Records_attacked.csv
