In [15]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.layers import Input, Dense, Dropout, Concatenate, GaussianNoise
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision, Recall

# Load data
data = pd.read_csv('data.csv')

X = data[['Hour_of_Day', 'Amount', 'V1', 'V2', 'V3', 'V4', 'V5',
          'Merchant_Type', 'Location_Distance', 'Transaction_Frequency',
          'Is_International', 'Device_Type']]
y = data['Class'].copy()

# Add label noise: flip 5% of labels randomly
n_flip = int(0.05 * len(y))
flip_indices = np.random.choice(y.index, size=n_flip, replace=False)
y.iloc[flip_indices] = 1 - y.iloc[flip_indices]

# Preprocessing
numeric_features = ['Hour_of_Day', 'Amount', 'V1', 'V2', 'V3', 'V4', 'V5',
                    'Location_Distance', 'Transaction_Frequency', 'Is_International']
categorical_features = ['Merchant_Type', 'Device_Type']

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

X_processed = preprocessor.fit_transform(X)
joblib.dump(preprocessor, 'preprocessor_dcn.pkl')

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.3, stratify=y, random_state=42
)

input_dim = X_train.shape[1]

# Model with strong noise and dropout
input_layer = Input(shape=(input_dim,), name='input')
noisy_input = GaussianNoise(0.3)(input_layer)  # stronger noise

# Cross part
cross = Dense(input_dim, activation='linear', name='cross1')(noisy_input)
cross = Concatenate()([noisy_input, cross])
cross = Dense(input_dim, activation='linear', name='cross2')(cross)

# Deep part smaller and higher dropout
deep = Dense(32, activation='relu')(noisy_input)
deep = Dropout(0.7)(deep)
deep = Dense(16, activation='relu')(deep)
deep = Dropout(0.7)(deep)

merged = Concatenate()([cross, deep])
output = Dense(1, activation='sigmoid')(merged)

model = Model(inputs=input_layer, outputs=output)

# Compile without class weights
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(), Recall()]
)

early_stop = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=5,               # fewer epochs
    batch_size=512,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

results = model.evaluate(X_test, y_test)
print(f"Accuracy: {results[1]:.4f}")
print(f"Precision: {results[2]:.4f}")
print(f"Recall: {results[3]:.4f}")

model.save('fraud_dcn_model_forced_under97.h5')



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 0.9437
Precision: 0.9484
Recall: 0.9390


In [1]:
# predict_fraud.py

import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.models import load_model

# Define feature names
numeric_features = ['Hour_of_Day', 'Amount', 'V1', 'V2', 'V3', 'V4', 'V5',
                    'Location_Distance', 'Transaction_Frequency', 'Is_International']
categorical_features = ['Merchant_Type', 'Device_Type']

# Load the trained model and preprocessor
model = load_model('fraud_dcn_model_forced_under97.h5')
preprocessor = joblib.load('preprocessor_dcn.pkl')

# Risk level helper
def get_risk_level(score):
    if score >= 0.85:
        return 'High Risk'
    elif score >= 0.5:
        return 'Medium Risk'
    elif score >= 0.2:
        return 'Low Risk'
    else:
        return 'Very Low Risk'

# Prediction function
def predict_fraud(user_input: dict):
    """
    Predict if a transaction is fraud or not.
    user_input: dict with keys matching the model features
    """
    df = pd.DataFrame([user_input])
    X_processed = preprocessor.transform(df)
    prediction = model.predict(X_processed)[0][0]
    result = 'Fraud' if prediction >= 0.5 else 'Not Fraud'
    risk_level = get_risk_level(prediction)
    print(f"Prediction Score: {prediction:.4f} ({prediction*100:.2f}%) => {result}")
    print(f"Risk Level: {risk_level}")
    return result

# Example usage
if __name__ == '__main__':
    input_data = {
        'Hour_of_Day': 1, 'Amount': 2000, 'V1': -3, 'V2': 2, 'V3': -2, 'V4': 3, 'V5': -1,
    'Merchant_Type': 'Online', 'Location_Distance': 1000, 'Transaction_Frequency': 20,
    'Is_International': 1, 'Device_Type': 'Mobile'
    }

    predict_fraud(input_data)


Prediction Score: 0.9997 (99.97%) => Fraud
Risk Level: High Risk


In [21]:
python --version

NameError: name 'python' is not defined