In [0]:
import pandas as pd
import pickle
import random

# ---------- Load Saved Model and Transformers ----------
model_path = "/Workspace/Users/m.a.arya@accenture.com/randomforest.pkl"
scaler_path = "/Workspace/Users/m.a.arya@accenture.com/scaler.pkl"
encoder_path = "/Workspace/Users/m.a.arya@accenture.com/label_encoders.pkl"

with open(model_path, 'rb') as f:
    model = pickle.load(f)
with open(scaler_path, 'rb') as f:
    scaler = pickle.load(f)
with open(encoder_path, 'rb') as f:
    label_encoders = pickle.load(f)


# ---------- Input Widgets ----------
dbutils.widgets.text("amount", "1000", "Amount")
dbutils.widgets.dropdown("hour", "12", [str(i) for i in range(24)], "Hour of Day")
dbutils.widgets.dropdown("day_of_week", "Monday", ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"], "Day of Week")
dbutils.widgets.dropdown("upi_channel", "PhonePe", ["GPay", "BHIM", "PayTM", "PhonePe"], "UPI Channel")
dbutils.widgets.dropdown("merchant_category", "Utilities",['Utilities', 'Shopping', 'Food', 'Travel', 'Education', 'Healthcare'], "Merchant Category")
dbutils.widgets.dropdown("device_os", "Android", ["Android", "iOS", "Windows", "Other"], "Device OS")
dbutils.widgets.text("transaction_city", "Mumbai", "Transaction City")
dbutils.widgets.text("transaction_state", "Maharashtra", "Transaction State")
dbutils.widgets.dropdown("transaction_status", "Completed", ["Completed", "Pending", "Failed"], "Transaction Status")

# ---------- Feature Construction ----------
input_data = {
    "amount": float(dbutils.widgets.get("amount")),
    "hour": int(dbutils.widgets.get("hour")),
    "day_of_week": dbutils.widgets.get("day_of_week"),
    "transaction_amount_deviation": round(random.uniform(-100, 100), 2),  # Simulate deviation
    "is_high_value": 1 if float(dbutils.widgets.get("amount")) > 3000 else 0,
    "is_odd_hour": 1 if int(dbutils.widgets.get("hour")) < 6 or int(dbutils.widgets.get("hour")) > 22 else 0,
    "upi_channel": dbutils.widgets.get("upi_channel"),
    "merchant_category": dbutils.widgets.get("merchant_category"),
    "device_os": dbutils.widgets.get("device_os"),
    "transaction_city": dbutils.widgets.get("transaction_city"),
    "transaction_state": dbutils.widgets.get("transaction_state"),
    "transaction_status": dbutils.widgets.get("transaction_status")
}

df_input = pd.DataFrame([input_data])

# ---------- Preprocess ----------
categorical_cols = [
    'day_of_week',
    'upi_channel',
    'transaction_status',
    'merchant_category',
    'device_os',
    'transaction_city',
    'transaction_state'
]

numeric_cols = [
    'amount',
    'hour',
    'transaction_amount_deviation',
    'is_high_value',
    'is_odd_hour'
]

# Encode
for col in categorical_cols:
    encoder = label_encoders[col]
    df_input[col + "_enc"] = encoder.transform(df_input[col].astype(str))

# Scale
df_input[numeric_cols] = scaler.transform(df_input[numeric_cols])

# Feature columns
feature_cols = [col + "_enc" for col in categorical_cols] + numeric_cols
X_input = df_input[feature_cols]

# ---------- Predict ----------
y_pred = model.predict(X_input)
y_proba = model.predict_proba(X_input)[:, 1]

# ---------- Display Result in Widget ----------
df_input["fraud_prediction"] = y_pred
df_input["fraud_probability"] = y_proba

prediction = "Fraudulent" if y_pred[0] == 1 else "Legitimate"
color = "#ff4d4d" if y_pred[0] == 1 else "#5cd65c"
confidence = round(y_proba[0] * 100, 2)

html = f"""
<div style="padding:15px;border-radius:10px;background-color:{color};color:white;width:300px;">
    <h3>Prediction: {prediction}</h3>
   
</div>
"""

displayHTML(html)


In [0]:
import pandas as pd
import pickle

# ---------- Load Saved Model and Transformers ----------
model_path = "/Workspace/Users/m.a.arya@accenture.com/randomforest.pkl"
scaler_path = "/Workspace/Users/m.a.arya@accenture.com/scaler.pkl"
encoder_path = "/Workspace/Users/m.a.arya@accenture.com/label_encoders.pkl"

with open(model_path, 'rb') as f:
    model = pickle.load(f)
with open(scaler_path, 'rb') as f:
    scaler = pickle.load(f)
with open(encoder_path, 'rb') as f:
    label_encoders = pickle.load(f)

# ---------- Define All Your Transaction Rows --------------
data = [
    # device_id, amount, hour, day_of_week, transaction_amount_deviation, is_high_value, is_odd_hour,
    # upi_channel, transaction_status, merchant_category, device_os,
    # default_city, default_state, transaction_city, transaction_state,
    # fraud, transaction_date, transaction_time
    [240, 1173, 111, "Wednesday", -56.61, 0, 1, "GPay", "Completed", "Healthcare", "Other",
     "Kulti", "West Bengal", "Bally", "West Bengal", 0, "6/21/2023", "12:45:06 AM"],
    [1490, 1426, 212, "Wednesday", 25.21, 1, 1, "PhonePe", "Completed", "Utilities", "iOS",
     "Kota", "Rajasthan", "Jabalpur", "Madhya Pradesh", 0, "5/15/2024", "4:33:15 AM"],
    [1995, 1458, 617, "Thursday", 9.57, 1, 0, "PayTM", "Completed", "Utilities", "iOS",
     "Bangalore", "Karnataka", "Ludhiana", "Punjab", 0, "9/28/2023", "6:01:47 PM"],
    [1819, 1037, 389, "Friday", 56.34, 1, 0, "PhonePe", "Completed", "Travel", "Android",
     "Panihati", "West Bengal", "Gulbarga", "Karnataka", 0, "4/14/2023", "9:14:38 PM"],
    [558, 540, 607, "Monday", -5.05, 1, 1, "PhonePe", "Pending", "Utilities", "Android",
     "Bilaspur", "Chhattisgarh", "Pune", "Maharashtra", 0, "8/7/2023", "1:16:04 AM"],
    [1693, 1466, 409, "Thursday", -8.15, 1, 0, "PhonePe", "Completed", "Shopping", "Android",
     "Begusarai", "Bihar", "New Delhi", "Delhi", 0, "2/23/2023", "7:55:32 AM"],
    [1549, 428, 121, "Tuesday", 45.62, 1, 0, "PhonePe", "Failed", "Healthcare", "iOS",
     "Bangalore", "Karnataka", "Tiruchirappalli", "Tamil Nadu", 1, "12/5/2023", "3:42:25 PM"],
    [647, 1414, 774, "Monday", -24.18, 1, 0, "PhonePe", "Completed", "Travel", "iOS",
     "Puducherry", "Puducherry", "Pune", "Maharashtra", 0, "2/20/2023", "10:29:13 PM"],
    [870, 818, 891, "Saturday", 82.8, 1, 1, "PhonePe", "Pending", "Food", "iOS",
     "Jalandhar", "Punjab", "Hyderabad", "Telangana", 0, "2/25/2023", "5:29:48 AM"],
    [814, 1406, 620, "Saturday", 38, 1, 0, "PhonePe", "Completed", "Shopping", "Android",
     "Karimnagar", "Telangana", "Tiruvottiyur", "Tamil Nadu", 1, "########", "6:19:22 AM"],
    [222, 1396, 549, "Monday", -86.86, 1, 0, "GPay", "Completed", "Travel", "iOS",
     "Ludhiana", "Punjab", "Bangalore", "Karnataka", 0, "3/18/2024", "9:07:08 PM"],
    [862, 406, 256, "Friday", -18.76, 1, 0, "PhonePe", "Completed", "Utilities", "iOS",
     "Chandrapur", "Maharashtra", "Dehradun", "Uttarakhand", 0, "5/17/2024", "9:38:49 PM"],
    [601, 585, 903, "Sunday", 30.22, 1, 0, "GPay", "Failed", "Shopping", "Android",
     "Rajahmundry", "Andhra Pradesh", "North Dumdum", "West Bengal", 1, "3/10/2024", "12:49:01 PM"],
    [1229, 1500, 685, "Tuesday", -35, 1, 0, "PayTM", "Completed", "Travel", "Android",
     "Jhansi", "Uttar Pradesh", "Mathura", "Uttar Pradesh", 0, "########", "7:20:05 AM"],
    [1405, 766, 426, "Friday", 24.04, 1, 0, "GPay", "Pending", "Shopping", "Android",
     "Panipat", "Haryana", "Mangalore", "Karnataka", 1, "5/12/2023", "8:02:05 PM"],
    [1956, 862, 801, "Monday", 50.2, 1, 1, "PhonePe", "Pending", "Utilities", "iOS",
     "Bhopal", "Madhya Pradesh", "Gurgaon", "Haryana", 0, "5/27/2024", "3:46:27 AM"],
    [1149, 423, 285, "Thursday", 6.38, 1, 0, "PhonePe", "Failed", "Utilities", "iOS",
     "Jaipur", "Rajasthan", "Parbhani", "Maharashtra", 0, "5/2/2024", "4:57:37 PM"],
    [843, 523, 54, "Monday", 3.45, 1, 1, "PhonePe", "Pending", "Shopping", "Android",
     "Jalna", "Maharashtra", "Nanded", "Maharashtra", 0, "7/31/2023", "2:01:43 AM"],
    [202, 805, 237, "Saturday", -8.53, 1, 1, "PhonePe", "Completed", "Utilities", "Android",
     "Karimnagar", "Telangana", "Agra", "Uttar Pradesh", 0, "9/2/2023", "5:17:45 AM"],
    [1178, 906, 493, "Wednesday", 2.57, 1, 0, "PhonePe", "Pending", "Healthcare", "Android",
     "Coimbatore", "Tamil Nadu", "Aurangabad", "Maharashtra", 0, "5/31/2023", "11:33:12 AM"],
    [519, 163, 483, "Monday", 40.61, 1, 0, "PhonePe", "Completed", "Shopping", "Android",
     "Karnal", "Haryana", "Alwar", "Rajasthan", 0, "8/28/2023", "10:51:31 AM"],
    [849, 1104, 753, "Sunday", 75.49, 1, 1, "PhonePe", "Pending", "Shopping", "Android",
     "Kolhapur", "Maharashtra", "Asansol", "West Bengal", 1, "3/5/2023", "12:41:05 AM"]
]

columns = [
    "user_id", "amount", "hour", "day_of_week", "transaction_amount_deviation", "is_high_value", "is_odd_hour",
    "upi_channel", "transaction_status", "merchant_category", "device_os",
    "default_city", "default_state", "transaction_city", "transaction_state",
    "fraud", "transaction_date", "transaction_time"
]

df = pd.DataFrame(data, columns=columns)

# ---------- Preprocess ----------
categorical_cols = [
    'day_of_week',
    'upi_channel',
    'transaction_status',
    'merchant_category',
    'device_os',
    'transaction_city',
    'transaction_state'
]

numeric_cols = [
    'amount',
    'hour',
    'transaction_amount_deviation',
    'is_high_value',
    'is_odd_hour'
]

# Label Encode categorical columns
for col in categorical_cols:
    encoder = label_encoders[col]
    df[col + "_enc"] = encoder.transform(df[col].astype(str))

# Scale numeric columns
df[numeric_cols] = scaler.transform(df[numeric_cols])

# Feature columns for model input
feature_cols = [col + "_enc" for col in categorical_cols] + numeric_cols
X_input = df[feature_cols]

# ---------- Predict ----------
y_pred = model.predict(X_input)
y_proba = model.predict_proba(X_input)[:, 1]




# ---------- Add to DataFrame ----------
df["fraud_prediction"] = y_pred
df["fraud_probability"] = y_proba

# ---------- Display Results ----------
print(df[[
    "user_id",
    "fraud"
]])


    user_id  fraud
0       240      0
1      1490      0
2      1995      0
3      1819      0
4       558      0
5      1693      0
6      1549      1
7       647      0
8       870      0
9       814      1
10      222      0
11      862      0
12      601      1
13     1229      0
14     1405      1
15     1956      0
16     1149      0
17      843      0
18      202      0
19     1178      0
20      519      0
21      849      1


In [0]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

#-----------------------------------
# Assumptions
# Your DataFrame df must include:
# - df["fraud_probability"]: predicted probability of fraud from your model
# - df["fraud"]: true label, 1 = fraud, 0 = not fraud
#-----------------------------------

# Arrays for convenience
fraud_probs = df["fraud_probability"].values
true_labels = df["fraud"].values

best_thresh = 0.5
best_f1 = 0.0

print("Threshold  Precision  Recall      F1")
print("----------------------------------------")

for thresh in np.arange(0.3, 0.71, 0.05):
    preds = (fraud_probs >= thresh).astype(int)
    prec = precision_score(true_labels, preds)
    rec = recall_score(true_labels, preds)
    curr_f1 = f1_score(true_labels, preds)
    print(f"{thresh:8.2f}  {prec:9.2f}  {rec:6.2f}  {curr_f1:8.2f}")
    if curr_f1 > best_f1:
        best_f1 = curr_f1
        best_thresh = thresh

print(f"\nBest threshold by F1-score: {best_thresh:.2f} (F1 = {best_f1:.2f})")

# Optional: Apply best threshold and store as new prediction column
df["fraud_prediction_optimal"] = (fraud_probs >= best_thresh).astype(int)


Threshold  Precision  Recall      F1
----------------------------------------
    0.30       0.23    1.00      0.37
    0.35       0.23    1.00      0.37
    0.40       0.23    1.00      0.37
    0.45       0.23    1.00      0.37
    0.50       0.28    1.00      0.43
    0.55       0.36    1.00      0.53
    0.60       0.38    0.60      0.46
    0.65       0.00    0.00      0.00
    0.70       0.00    0.00      0.00

Best threshold by F1-score: 0.55 (F1 = 0.53)
