In [10]:
import pandas as pd
from collections import defaultdict

# Global tracking of seller activity
seller_stats = defaultdict(lambda: {'total': 0, 'fraud': 0})

In [1]:
!pip install pyspark



In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder \
    .appName("FraudStream") \
    .getOrCreate()

In [3]:
from pyspark.sql.functions import col
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType, TimestampType

# Define schema to ensure correct dtypes
schema = StructType([
    StructField("transaction_id", StringType(), True),
    StructField("seller_id", StringType(), True),
    StructField("buyer_id", StringType(), True),
    StructField("amount", DoubleType(), True),
    StructField("payment_method", StringType(), True),
    StructField("device_type", StringType(), True),
    StructField("location", StringType(), True),
    StructField("timestamp", TimestampType(), True),
    StructField("is_fraud", IntegerType(), True)
])

df = spark.read.csv("data/transactions.csv", header=True, schema=schema)
df = df.orderBy("timestamp")
df.show(5)

+--------------+---------+--------+-------+--------------+-----------+---------+--------------------+--------+
|transaction_id|seller_id|buyer_id| amount|payment_method|device_type| location|           timestamp|is_fraud|
+--------------+---------+--------+-------+--------------+-----------+---------+--------------------+--------+
|       T089558|     S185|   B2277| 831.11|   Net Banking|     Tablet|   Mumbai|2025-03-07 21:04:...|       0|
|       T013074|     S136|   B1109|4354.84|   Net Banking|    Android|   Others|2025-03-07 21:05:...|       0|
|       T009809|     S122|   B8432|1262.13|    Debit Card|        iOS|  Chennai|2025-03-07 21:07:...|       0|
|       T042449|     S127|   B4396| 3165.2|           UPI|     Tablet|Bangalore|2025-03-07 21:08:...|       0|
|       T058474|     S172|   B5532|3405.24|           COD|    Desktop|Bangalore|2025-03-07 21:08:...|       0|
+--------------+---------+--------+-------+--------------+-----------+---------+--------------------+--------+
o

In [4]:
import pandas as pd
import xgboost as xgb
import pickle

# Load model & encoders
model = xgb.XGBClassifier()
model.load_model("model/xgb_fraud_model.json")
with open("model/label_encoders.pkl", "rb") as f:
    le_dict = pickle.load(f)

In [14]:
def process_batch(pdf):
    if pdf.empty:
        return

    def safe_transform(le, col_data):
        known_classes = set(le.classes_)
        return [le.transform([val])[0] if val in known_classes else -1 for val in col_data]

    # Handle unseen values safely
    for col in ['payment_method', 'device_type', 'location', 'seller_id']:
        if col in pdf:
            le = le_dict[col]
            pdf[col] = safe_transform(le, pdf[col])

    X_live = pdf[['seller_id', 'amount', 'payment_method', 'device_type', 'location']]
    preds = model.predict(X_live)
    pdf['predicted_fraud'] = preds

    # Update seller stats
    for _, row in pdf.iterrows():
        sid = row['seller_id']
        seller_stats[sid]['total'] += 1
        if row['predicted_fraud'] == 1:
            seller_stats[sid]['fraud'] += 1

    # Detect frauds
    frauds = pdf[pdf['predicted_fraud'] == 1]
    if not frauds.empty:
        print("⚠️ Fraudulent transactions detected:")
        print(frauds[['transaction_id', 'amount', 'seller_id']])

        # Save to log
        frauds.to_csv('fraud_logs/fraud_batch.csv', mode='a', header=False, index=False)

    # Print Top Risky Sellers
    print("\n📈 Top Risky Sellers (RWSI):")
    rwsi_df = pd.DataFrame([
        {'seller_id': sid, 'RWSI': round((v['fraud'] / v['total']) * 100, 2)}
        for sid, v in seller_stats.items() if v['total'] > 5
    ]).sort_values(by='RWSI', ascending=False).head(5)

    print(rwsi_df)

In [17]:
import os
os.makedirs('fraud_logs', exist_ok=True)

# Convert to Pandas and simulate stream
full_df = df.toPandas()
batch_size = 5000

for i in range(0, len(full_df), batch_size):
    batch = full_df.iloc[i:i+batch_size]
    print(f"Processing batch {i // batch_size + 1}...")
    process_batch(batch)

Processing batch 1...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
     transaction_id   amount  seller_id
9           T008308  1064.78          0
25          T074674  3203.99         19
53          T060990   206.13         40
63          T016976  4720.33          7
70          T002924  7028.71         26
...             ...      ...        ...
4864        T035346  2394.38         34
4868        T075580  2495.45         53
4905        T099335  3236.70         91
4918        T026826  3282.24         58
4967        T041409  3886.89         91

[324 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  27.84
44         47  27.41
72         48  27.21
22         54  24.95
60         35  21.18
Processing batch 2...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
     transaction_id   amount  seller_id
5010        T000197  1708.69         78
5034        T099847  2354.73         52
5065        T068115  3237.27         69
5070        T084129  1097.15         64
5079        T094696  3929.24         90
...             ...      ...        ...
9900        T067929  3855.89         91
9911        T033788  2434.60         57
9915        T070381  2891.46         54
9953        T001073  7481.24         92
9990        T050383  3281.00         47

[319 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  27.98
44         47  27.46
72         48  27.09
22         54  24.58
65         53  21.04
Processing batch 3...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
10000        T099508  1612.21         95
10011        T046436  4166.35         47
10043        T035068  1812.25         70
10063        T002891  5169.60         89
10068        T089576  2701.73         34
...              ...      ...        ...
14914        T062357  1726.55         62
14928        T085820  2064.98         76
14942        T013157  2747.56         48
14981        T081403  2675.92         48
14991        T014003  3349.76         33

[345 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  27.88
44         47  27.55
72         48  27.22
22         54  24.75
60         35  21.19
Processing batch 4...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
15004        T017938  2505.28         53
15011        T029411  2761.69         46
15031        T043467  1715.47         94
15042        T072216  2682.11         46
15049        T070329  3235.10         68
...              ...      ...        ...
19937        T082422  4201.89         34
19938        T030314  2386.59         33
19961        T067098  3146.59         41
19966        T043637  2661.27         57
19999        T026644  3530.80         54

[318 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  27.57
44         47  27.49
72         48  27.41
22         54  24.66
65         53  21.47
Processing batch 5...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
20031        T050231  2942.58         38
20035        T010950  1118.97         64
20046        T014783  3229.04         20
20051        T016016  1740.46         75
20067        T030419  3963.93         91
...              ...      ...        ...
24933        T053536  3939.43         47
24951        T007675  3676.92         91
24971        T031115  4999.72          1
24982        T078420  1647.76         95
24990        T003174  1386.24         61

[347 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
72         48  27.67
37         57  27.66
44         47  27.24
22         54  24.61
60         35  21.27
Processing batch 6...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
25005        T001520  6883.76         94
25007        T090767  4552.49         39
25011        T096173  2215.96         47
25015        T002497  6279.95          9
25032        T002821  6198.30          2
...              ...      ...        ...
29950        T038602  2688.42         57
29951        T029734  3134.61         77
29963        T039923  3279.53         53
29964        T001712  7075.27         52
29993        T022443  1365.36         47

[312 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  27.57
72         48  27.47
44         47  27.19
22         54  24.33
60         35  21.38
Processing batch 7...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
30002        T014198  2405.64          1
30050        T061816  4071.44         60
30063        T002022  6934.43         39
30070        T042566  3732.42         48
30079        T025931  4911.95          7
...              ...      ...        ...
34923        T001213  5835.24         32
34930        T090291  3132.30         21
34948        T064419  4619.80         38
34964        T049498  4001.56         48
34969        T000521  1844.86         60

[358 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  27.77
72         48  27.76
44         47  27.15
22         54  24.08
60         35  21.41
Processing batch 8...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
35002        T009713  3984.40         50
35007        T067612  1721.14         81
35008        T095182  3244.27         75
35009        T092673  3326.98         54
35054        T059788  1844.06         53
...              ...      ...        ...
39884        T025467  1082.85         57
39905        T000084  7192.99         28
39928        T002326  7032.88         28
39983        T020416  2512.08         52
39987        T044165  3200.69         39

[330 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.11
72         48  27.95
44         47  27.50
22         54  24.10
60         35  21.55
Processing batch 9...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
40011        T001675  6117.98         24
40064        T000206  5893.02         30
40073        T042311  1125.26         48
40095        T087860   200.73         39
40096        T022584  1808.29         62
...              ...      ...        ...
44822        T021254  2418.25          1
44846        T000189  6662.17         37
44867        T010328  2407.48          1
44942        T082101  3954.77         90
44943        T002462  1838.26         87

[348 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.53
72         48  28.29
44         47  27.77
22         54  24.17
60         35  21.22
Processing batch 10...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
45025        T000669  5023.82         14
45050        T012775  3004.57         54
45063        T085756  1741.43         14
45065        T030222  3191.33          0
45084        T001371  4733.49         81
...              ...      ...        ...
49951        T028111  4994.24         61
49967        T001179  6269.42         98
49975        T073876  4765.04         87
49983        T082474  3184.19         34
49993        T076726  4630.01         48

[299 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.72
72         48  28.06
44         47  27.76
22         54  24.36
60         35  21.41
Processing batch 11...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
50028        T002191  5662.02         48
50038        T066719  1740.53         68
50044        T089687  1836.56         67
50048        T020494  3257.00         54
50055        T002645  4685.94         99
...              ...      ...        ...
54938        T087649  2681.47         47
54963        T039258   212.33         13
54976        T002862  6087.69         15
54980        T017166  4881.76          4
54989        T000773  6228.87         36

[336 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.38
72         48  28.02
44         47  27.71
22         54  24.21
60         35  21.49
Processing batch 12...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
55010        T006036  1837.57         87
55038        T098179  2655.50         53
55057        T009362  4884.14         16
55066        T003874   190.54         41
55071        T002121  7210.46         17
...              ...      ...        ...
59809        T004095  2630.12         33
59871        T002607  2571.93         58
59918        T001108  7380.52         83
59965        T024655  2746.19          2
59976        T056378  3187.19          0

[356 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.43
72         48  28.21
44         47  27.69
22         54  24.15
60         35  21.33
Processing batch 13...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
60017        T074174  2473.99         35
60020        T061031  1738.66         87
60031        T008150  1680.88         75
60038        T046189  2627.87          0
60042        T056605  4902.51         26
...              ...      ...        ...
64900        T027151  4729.33        100
64933        T001382  5058.53         62
64935        T011378  2439.04         34
64939        T038111  4605.71         42
64955        T000522  1361.79         91

[319 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.50
72         48  28.26
44         47  27.50
22         54  24.31
60         35  21.65
Processing batch 14...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
65000        T044545  4035.91         47
65039        T007934  3222.18         46
65045        T006979  3048.94         77
65074        T093314  3200.86         54
65084        T000731  6138.50         31
...              ...      ...        ...
69977        T062210  3170.28         19
69983        T022461  3253.74         40
69986        T000711  5991.71         18
69989        T096793  2635.79         62
69993        T076888  1740.35         62

[337 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.62
72         48  28.28
44         47  27.47
22         54  24.24
60         35  21.24
Processing batch 15...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
70004        T036027  4447.85         93
70029        T087787  3935.59         97
70031        T071447  2659.05         46
70033        T012067  1834.04         84
70038        T055449  2635.04         50
...              ...      ...        ...
74931        T038483  3097.79         33
74934        T083195  2489.50         50
74944        T030227  1707.98         19
74972        T001104  6353.97         46
74998        T033437  4773.85          5

[373 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.75
72         48  28.34
44         47  27.45
22         54  24.11
60         35  21.38
Processing batch 16...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
75013        T047858  4547.55         43
75032        T000428  7148.31         18
75034        T017016  2893.19         54
75055        T016446  1117.96         75
75061        T098788  4191.64         35
...              ...      ...        ...
79840        T056297  3216.91         47
79870        T000913  6625.27         20
79874        T066005  2629.57         47
79936        T085002  3030.57         35
79992        T030898  4061.47         50

[353 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.64
72         48  28.24
44         47  27.55
22         54  24.39
60         35  21.33
Processing batch 17...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
80033        T031248  4608.89         48
80067        T084624  4117.68         60
80081        T075646  2650.28         54
80110        T013374  3491.67         54
80114        T010529  3675.79         91
...              ...      ...        ...
84950        T002757  6895.67         68
84958        T091940  1744.32         47
84966        T002360  6051.00         13
84988        T076553  1596.05         62
84989        T099111  3207.12         39

[313 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.35
72         48  28.03
44         47  27.56
22         54  24.47
60         35  21.03
Processing batch 18...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
85007        T047221  4886.16         24
85020        T000418   682.46          0
85022        T015055  1348.17         61
85050        T092511  4824.60          6
85079        T000554  1833.30         49
...              ...      ...        ...
89909        T015588  3199.23         20
89924        T083612  2381.48         57
89929        T003834  3144.57         62
89940        T049482  3340.66         35
89986        T012585  1967.78          0

[338 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.46
72         48  27.92
44         47  27.57
22         54  24.47
60         35  20.97
Processing batch 19...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
90025        T028783  2571.19         53
90026        T000403  6131.19         98
90028        T000884  7043.99         35
90052        T042224  4733.40          4
90054        T015372  4242.30         46
...              ...      ...        ...
94908        T036264  2478.75         50
94911        T036462  2846.47         53
94926        T042945  1625.63         95
94962        T003525  2974.33         54
94991        T000998  6284.78         10

[336 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.07
72         48  27.78
44         47  27.59
22         54  24.68
60         35  21.35
Processing batch 20...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[col] = safe_transform(le, pdf[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf['predicted_fraud'] = preds


⚠️ Fraudulent transactions detected:
      transaction_id   amount  seller_id
95070        T086919  1686.01         62
95077        T059140  1383.71         48
95093        T019114  2876.21         57
95107        T061783  1826.83         74
95108        T079468  1724.60         50
...              ...      ...        ...
99967        T001564  2238.22          1
99968        T002889   411.80         47
99989        T002418  4873.68         26
99990        T000520  6575.33         12
99999        T060124  3130.45         19

[353 rows x 3 columns]

📈 Top Risky Sellers (RWSI):
    seller_id   RWSI
37         57  28.11
72         48  27.76
44         47  27.53
22         54  24.82
60         35  21.36
