# Fraud Detection Using User Behaviour [Supervised Learning]

- Dataset: Fraud User & other normal user in user profile
    - feature: User Behaviour Features
    - label: fraud types
    
- Method: multiclass classification (supervised learning)

# Load Packages

In [1]:
import numpy as np
import json
import base64
import pandas as pd
from datetime import datetime, timedelta
import pyspark.sql.functions as F
from pyspark.sql.window import Window
from pyspark import SparkConf
from pyspark.sql import SparkSession
import pyspark.sql.types as T
from pyspark.ml.linalg import Vectors, VectorUDT

In [2]:
def get_spark_session(**kwargs):
    """Initializing Spark context.

    Parameters
    ----------
    kwargs : dict
        Variable number of keyword arguments to initialize the SparkContext
        object

    Returns
    -------
     : SparkContext object

    """
    conf = SparkConf() \
        .setAppName(kwargs.get('app_name', 'test')) \
        .set("spark.executor.memory", kwargs.get('executor_memory', '20g')) \
        .set("spark.driver.memory", kwargs.get('driver_memory', '30g')) \
        .set("spark.driver.maxResultSize",
             kwargs.get('max_result_size', '100g')) \
        .set("spark.executor.instances", kwargs.get('num_executors', '100')) \
        .set("spark.executor.cores", kwargs.get('num_cores', '4')) \
        .set("spark.sql.crossJoin.enabled", True)  \
        .set("spark.cores.max", kwargs.get('cores_max', '1000'))    \
        .set("spark.network.timeout", kwargs.get('timeout', '3600s')) \
        .set("spark.executor.heartbeatInterval", kwargs.get('heartbeat', '3500s')) \
        .set("spark.sql.shuffle.partitions", kwargs.get('num_partitions', '4000')) \
        .set("spark.yarn.queue", kwargs.get('spark_yarn_queue', 'ds-critical'))

    spark_session = SparkSession.builder.config(conf=conf).enableHiveSupport().getOrCreate()
    
    return spark_session

In [3]:
your_name = "LiuMing" # put your name here
spark_appname = "pyspark_Supervised_Learning_{}".format(your_name)

# spark conf parameter
executor_memory = '10g'
driver_memory = '30g'
num_executors = '100'
num_partitions = '3000'

# get the spark object
spark = get_spark_session(app_name=spark_appname,
                         executor_memory=executor_memory,
                         driver_memory=driver_memory,
                         num_executors=num_executors,
                         num_partitions=num_partitions)

sc = spark.sparkContext
spark.sql('use shopee')

DataFrame[]

# Prepare Fraud and User Behaviour Dataset 

## Fraud User

In [4]:
Fraud_query = """
    select *
    from shopee_fraud_backend_id_db__fraud_user_tag_tab
    where status=1
"""
fraud_user_df = spark.sql(Fraud_query)
fraud_user_df = fraud_user_df.withColumn('date', F.from_unixtime('mtime', 'YYYY-MM-dd'))
fraud_user_df = fraud_user_df\
                .filter(fraud_user_df.date.between('2019-07-01','2019-10-01'))\
                .orderBy(['date','tagid'])

Fraud_tag_query = """
    select id as taggid, name
    from shopee_backend_id_db__fraud_tag_tab
    where severity = 1 and status = 1
"""
fraud_tag_df = spark.sql(Fraud_tag_query)

In [5]:
fraud_df = fraud_user_df.join(fraud_tag_df, fraud_tag_df.taggid == fraud_user_df.tagid)
fraud_category_df = fraud_df.groupBy('name').agg(F.count('userid').alias('total_count'))\
                    .orderBy('total_count', ascending=False)

fraud_category_df.show(100,False)

+--------------------------------------------+-----------+
|name                                        |total_count|
+--------------------------------------------+-----------+
|DP Voucher                                  |308191     |
|Voucher                                     |87541      |
|Free Shipping                               |55405      |
|Scam - Potential Scammer                    |50497      |
|Duplicate listing                           |49715      |
|Order Brushing                              |40388      |
|Welcome Package                             |12069      |
|Promotion T&C                               |12045      |
|Prohibited listing                          |5149       |
|SPL Overdue                                 |2742       |
|Coin fraud                                  |2560       |
|Use of emulator/simulator                   |2488       |
|CB inactive seller                          |1551       |
|Advertisement/poaching                      |764       

In [6]:
fraud_df = fraud_df.selectExpr('userid', 'name')
fraud_df.printSchema()
print(fraud_df.count())

root
 |-- userid: integer (nullable = true)
 |-- name: string (nullable = true)

633916


## Normal User

In [7]:
Normal_User_Query = """
    select userid
    from user_profile
    where last_login > '2019-07-01'
"""
all_user_df = spark.sql(Normal_User_Query)

normal_user_df = all_user_df.join(fraud_df, ['userid'], how='left_anti')
normal_user_df = normal_user_df.withColumn('name', F.lit('Normal'))

In [8]:
user_df = normal_user_df.union(fraud_df)
user_df.printSchema()

root
 |-- userid: integer (nullable = true)
 |-- name: string (nullable = true)



In [9]:
print(user_df.count())

103332168


## User Behaviour 

In [10]:
Behaviour_Feature_Query = """
    select *
    from shopee_ds.regds_kg_user_behaviour_feature
    where grass_region='ID'
"""

In [11]:
feature_df = spark.sql(Behaviour_Feature_Query)
feature_df.printSchema()
df = user_df.join(feature_df, user_df.userid == feature_df.uid)

root
 |-- uid: long (nullable = true)
 |-- total_active_time_in_last_30d: double (nullable = true)
 |-- total_session_count_in_last_30d: long (nullable = true)
 |-- total_ops_count_in_last_30d: long (nullable = true)
 |-- active_days_in_last_30d: string (nullable = true)
 |-- last_active_day: date (nullable = true)
 |-- hour_normalise: string (nullable = true)
 |-- action_normalise: string (nullable = true)
 |-- op_interval: string (nullable = true)
 |-- markov: string (nullable = true)
 |-- grass_region: string (nullable = true)



## Filters

We only select top 9 fraud types，because other fraud type doesn't have too much examples. We also put a threshold on user behaviour, only the user has more than 10 sessions within lastest 30d we consider about active user. And have enough behaviour features for us to classify whether it is a good user or bad user.

In [12]:
target_type = [
    'Normal',
    'DP Voucher',
    'Voucher',
    'Free Shipping',
    'Scam - Potential Scammer',
    'Duplicate listing',
    'Order Brushing',
    'Welcome Package',
    'Promotion T&C',
    'Coin fraud'
]

total_session_count_in_latest_30d_threshold = 10

In [13]:
df = df.filter(df.total_session_count_in_last_30d >= total_session_count_in_latest_30d_threshold)
print(df.count())

column_name = 'name'
filter_df = spark.createDataFrame(target_type, df.schema[column_name].dataType)
df = df.join(filter_df, df[column_name] == filter_df["value"])
print(df.count())

17398711
17391050


In [14]:
df.groupBy('name').agg(F.count('userid').alias('total_count'))\
                    .orderBy('total_count', ascending=False).show(20, False)

+------------------------+-----------+
|name                    |total_count|
+------------------------+-----------+
|Normal                  |17288848   |
|Voucher                 |45145      |
|Free Shipping           |15273      |
|Order Brushing          |13155      |
|Duplicate listing       |10397      |
|DP Voucher              |5471       |
|Welcome Package         |5271       |
|Scam - Potential Scammer|4546       |
|Promotion T&C           |1646       |
|Coin fraud              |1298       |
+------------------------+-----------+



In [15]:
frac = 1
fractions = {
    'Normal':0.005 * frac,
    'DP Voucher':1 * frac,
    'Voucher':0.5 * frac,
    'Free Shipping':1 * frac,
    'Scam - Potential Scammer':1 * frac,
    'Duplicate listing':1 * frac,
    'Order Brushing':1 * frac,
    'Welcome Package':1 * frac,
    'Promotion T&C':1 * frac,
    'Coin fraud':1 * frac
}

In [16]:
dataset = df.sampleBy("name", fractions=fractions, seed=0)
dataset.groupBy('name').agg(F.count('userid').alias('total_count'))\
                    .orderBy('total_count', ascending=False).show(20, False)

+------------------------+-----------+
|name                    |total_count|
+------------------------+-----------+
|Normal                  |86450      |
|Voucher                 |22560      |
|Free Shipping           |15273      |
|Order Brushing          |13155      |
|Duplicate listing       |10397      |
|DP Voucher              |5471       |
|Welcome Package         |5271       |
|Scam - Potential Scammer|4546       |
|Promotion T&C           |1646       |
|Coin fraud              |1298       |
+------------------------+-----------+



In [17]:
dataset.printSchema()

root
 |-- userid: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- uid: long (nullable = true)
 |-- total_active_time_in_last_30d: double (nullable = true)
 |-- total_session_count_in_last_30d: long (nullable = true)
 |-- total_ops_count_in_last_30d: long (nullable = true)
 |-- active_days_in_last_30d: string (nullable = true)
 |-- last_active_day: date (nullable = true)
 |-- hour_normalise: string (nullable = true)
 |-- action_normalise: string (nullable = true)
 |-- op_interval: string (nullable = true)
 |-- markov: string (nullable = true)
 |-- grass_region: string (nullable = true)
 |-- value: string (nullable = true)



In [None]:
# may takes a lot of time, but still fast than toPandas()
dataset.coalesce(500).write.mode('overwrite').parquet('user-behaviour-dataset')

# Supervised Learning 

## preprocess dataset

We use all 3 types of features. Since markov and action, active hour feature are normalised before, we just need to do column wise normalise on the other 4 columns. We use `RobustScaler` to avoid exetrme values. In total, we have 2108 dimentional vectors.

In [None]:
# !hadoop fs -get /user/ming.liu/user-behaviour-dataset ./data
# !rm -rf ./data/user-behaviour-dataset/_SUCCESS

In [None]:
# import sys
# !{sys.executable} -m pip install pyarrow

In [18]:
from sklearn import preprocessing

In [19]:
# dataset_pd = dataset.toPandas()
dataset_pd =pd.read_parquet("./data/user-behaviour-dataset")

In [20]:
dataset_pd['hour_normalise'] = dataset_pd['hour_normalise'].apply(lambda x: json.loads(x))
dataset_pd['action_normalise'] = dataset_pd['action_normalise'].apply(lambda x: json.loads(x))
dataset_pd['op_interval'] = dataset_pd['op_interval'].apply(lambda x: json.loads(x))
dataset_pd['markov'] = dataset_pd['markov'].apply(lambda x: json.loads(x))

In [21]:
raw_interval_data = dataset_pd.as_matrix(columns=['op_interval'])
interval_data = [[] for _ in range(raw_interval_data.shape[0])]
for i, d in enumerate(interval_data):
    for c in raw_interval_data[i]:
        interval_data[i].extend(c)
interval_data = np.array(interval_data, dtype='float')

  """Entry point for launching an IPython kernel.


In [22]:
interval_data_max = np.max(interval_data, axis=1).reshape(raw_interval_data.shape[0], 1)
row_sums = interval_data.sum(axis=1)
interval_data_normalise = interval_data / row_sums[:, np.newaxis]

In [23]:
# column wise normalise 
x = dataset_pd[[
    'total_active_time_in_last_30d', 
    'total_session_count_in_last_30d', 
    'total_ops_count_in_last_30d']].values.astype(float)

x = np.concatenate((interval_data_max, x), axis = 1)

#scaler = preprocessing.MinMaxScaler()
scaler = preprocessing.RobustScaler() 
x_scaled = scaler.fit_transform(x)

In [24]:
raw_data = dataset_pd.as_matrix(columns=[
#     'total_active_time_in_last_30d',
#     'total_session_count_in_last_30d',
#     'total_ops_count_in_last_30d',
    'hour_normalise',
    'action_normalise',
#     'op_interval',
    'markov'
])
label = dataset_pd.as_matrix(columns=['name']).reshape(len(dataset_pd),)

  
  # Remove the CWD from sys.path while we load stuff.


In [25]:
data = [[] for _ in range(raw_data.shape[0])]
for i, d in enumerate(data):
    for c in raw_data[i]:
        try:
            data[i].extend(c)
        except:
            data[i].append(c)

In [26]:
data = np.concatenate((data, x_scaled, interval_data_normalise), axis = 1)

In [27]:
data = np.array(data)
data.shape

(166065, 2108)

In [28]:
for i, l in enumerate(label):
    label[i] = target_type.index(l)

In [29]:
label = np.array(label, dtype=int)

## SVM 

- **Results** SVM give us baseline, 0.82 - 0.83 accuracy on 10 categories.

In [None]:
# import sys
# !{sys.executable} -m pip install scikit-learn

In [30]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC

In [31]:
X_train, X_test, y_train, y_test = train_test_split(
    data,
    label, 
    test_size=0.2, 
    random_state=4
)

In [32]:
classifier = OneVsRestClassifier(LinearSVC(random_state=0, verbose=1))
classifier.fit(X_train, y_train)
y_predict = classifier.predict(X_test)
classifier.score(X_test, y_test)

[LibLinear]



[LibLinear][LibLinear][LibLinear][LibLinear][LibLinear][LibLinear][LibLinear][LibLinear][LibLinear]

0.8257007798151327

In [33]:
print(
    classification_report(
        list(y_test), 
        list(y_predict), 
        target_names = target_type
    ))

                          precision    recall  f1-score   support

                  Normal       0.87      0.96      0.91     17395
              DP Voucher       0.82      0.84      0.83      1166
                 Voucher       0.69      0.66      0.67      4428
           Free Shipping       0.68      0.50      0.57      3022
Scam - Potential Scammer       0.70      0.51      0.59       896
       Duplicate listing       0.79      0.85      0.82      2039
          Order Brushing       0.89      0.85      0.87      2670
         Welcome Package       0.86      0.74      0.79      1008
           Promotion T&C       0.72      0.37      0.49       340
              Coin fraud       0.70      0.08      0.15       249

               micro avg       0.83      0.83      0.83     33213
               macro avg       0.77      0.63      0.67     33213
            weighted avg       0.82      0.83      0.82     33213



## naive neural network

- **Results** Simple neural network give not bad performance. 0.85 accuracy on 10 categories. Add more layers almost gives similar performance as just one hidden layer.

In [34]:
# tf 2.0
import tensorflow as tf

In [41]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
#     tf.keras.layers.Dense(64, activation='relu'),
#     tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(len(target_type), activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [42]:
model.fit(
    X_train, 
    y_train, 
    epochs=20, 
    use_multiprocessing=True, 
    validation_data=(X_test,  y_test)
)

Train on 132852 samples, validate on 33213 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fc9147775d0>

In [43]:
y_predict = np.argmax(model.predict(X_test), axis = 1)

In [44]:
print(
    classification_report(
        list(y_test), 
        list(y_predict), 
        target_names = target_type
    ))

                          precision    recall  f1-score   support

                  Normal       0.91      0.95      0.93     17395
              DP Voucher       0.87      0.85      0.86      1166
                 Voucher       0.71      0.71      0.71      4428
           Free Shipping       0.69      0.61      0.65      3022
Scam - Potential Scammer       0.70      0.63      0.66       896
       Duplicate listing       0.84      0.87      0.85      2039
          Order Brushing       0.90      0.87      0.89      2670
         Welcome Package       0.86      0.77      0.81      1008
           Promotion T&C       0.78      0.54      0.64       340
              Coin fraud       0.57      0.22      0.32       249

               micro avg       0.85      0.85      0.85     33213
               macro avg       0.78      0.70      0.73     33213
            weighted avg       0.85      0.85      0.85     33213



## lightGBM

- **Results** GBDT gives highest performance, around 0.87, but also takes many time to train it.

In [None]:
# import sys
# !{sys.executable} -m pip install setuptools wheel numpy scipy scikit-learn -U
# !{sys.executable} -m pip install lightgbm

In [None]:
# acc = 0.87
# classifier = LGBMClassifier(
#     num_leaves=31, 
#     max_depth=-1, 
#     learning_rate=0.1, 
#     n_estimators=300, 
#     n_jobs=50,
#     silent=False
# )

In [46]:
from lightgbm import LGBMClassifier

In [47]:
# all default params, just add more epoch 100 -> 300

classifier = LGBMClassifier(n_estimators=300)
classifier.fit(X_train, y_train)
y_predict = classifier.predict(X_test)

In [48]:
print(
    classification_report(
        list(y_test), 
        list(y_predict), 
        target_names = target_type
    ))

                          precision    recall  f1-score   support

                  Normal       0.92      0.97      0.94     17395
              DP Voucher       0.89      0.86      0.88      1166
                 Voucher       0.73      0.75      0.74      4428
           Free Shipping       0.72      0.63      0.67      3022
Scam - Potential Scammer       0.79      0.66      0.72       896
       Duplicate listing       0.86      0.89      0.87      2039
          Order Brushing       0.92      0.88      0.90      2670
         Welcome Package       0.90      0.80      0.85      1008
           Promotion T&C       0.85      0.59      0.69       340
              Coin fraud       0.51      0.22      0.31       249

               micro avg       0.87      0.87      0.87     33213
               macro avg       0.81      0.72      0.76     33213
            weighted avg       0.86      0.87      0.86     33213



原生的 lightgbm 完全玩不来。。。 学了一天调了一天还没有上面封装的 default 来的高。。。 但是，用它提供个的 Dataset 来存放数据只会使用很少的内存。 

In [None]:
# import lightgbm as lgb

# train_data = lgb.Dataset(X_train, label=y_train, free_raw_data=False)
# test_data = lgb.Dataset(X_test, label=y_test, free_raw_data=False)

# parameters = {
#     'objective': 'multiclass',
#     'num_classes': len(target_type),
#     'metric': 'softmax',
#     'is_unbalance': 'true',
#     'boosting': 'gbdt',
#     'num_leaves': 31,
# #     'feature_fraction': 0.8,
# #     'bagging_fraction': 0.5,
# #     'bagging_freq': 20,
#     'learning_rate': 0.1,
#     'max_bin': 255,
#     'verbose': 1
# }

# gbm = lgb.train(
#     parameters,
#     train_data,
# #     valid_sets=[test_data],
#     num_boost_round=300,
# #     verbose_eval=5,
# #     early_stopping_rounds=30,
# #     init_model='lgb_model.txt'
# )

# y_predict = np.argmax(gbm.predict(X_test), axis = 1)

# gbm.save_model('lgb_model.txt')
# # bst = lgb.Booster(model_file='model.txt')

# print(
#     classification_report(
#         list(y_test), 
#         list(y_predict), 
#         target_names = target_type
#     ))

# Raise Threshold To See What Happens


## Only examine predictions with high confidence
- **Conclusion** very promising.

In [49]:
def filter_result(predictions, labels, confidence_threshold):
    f_prediction = []
    f_label = []
    for i, p in enumerate(predictions):
        if max(p) >= confidence_threshold:
            f_prediction.append(p)
            f_label.append(labels[i])
    
    print('%.3f are left' % (float(len(f_label)) / len(labels)))
    print('=' * 20)
    
    y_predict = np.argmax(f_prediction, axis = 1)
    print(
    classification_report(
        list(f_label), 
        list(y_predict), 
        target_names = target_type
    ))

In [52]:
filter_result(classifier.predict_proba(X_test), y_test, 0.8)

0.770 are left
                          precision    recall  f1-score   support

                  Normal       0.96      0.99      0.98     15771
              DP Voucher       0.95      0.96      0.95       973
                 Voucher       0.88      0.84      0.86      2270
           Free Shipping       0.89      0.77      0.83      1215
Scam - Potential Scammer       0.95      0.80      0.87       476
       Duplicate listing       0.93      0.98      0.96      1536
          Order Brushing       0.98      0.97      0.97      2216
         Welcome Package       0.96      0.88      0.92       815
           Promotion T&C       0.92      0.75      0.82       210
              Coin fraud       0.76      0.34      0.47        82

               micro avg       0.95      0.95      0.95     25564
               macro avg       0.92      0.83      0.86     25564
            weighted avg       0.95      0.95      0.95     25564



## Only allow more active user to be trained and tested

- **Conclusion** make more restrict threshold on user activeness doesn't give too much accuracy improvement.

In [45]:
min_session = 30     # only consider user with more than 30 sessions within last 30 days
min_time = 30 * 60   # or consider user spent more than 30 min within last 30 days

In [53]:
f1 = dataset_pd['total_active_time_in_last_30d'] >= min_time
f2 = dataset_pd['total_session_count_in_last_30d'] >= min_session
dataset_pd_restrict = dataset_pd[f1 | f2]

In [54]:
print('%.3f data left after applied filter' % (len(dataset_pd_restrict) / float(len(dataset_pd))))

0.862 data left after applied filter


In [55]:
def data_preprocessing(dataset_pd):
    raw_interval_data = dataset_pd.as_matrix(columns=['op_interval'])
    interval_data = [[] for _ in range(raw_interval_data.shape[0])]
    for i, d in enumerate(interval_data):
        for c in raw_interval_data[i]:
            interval_data[i].extend(c)
    interval_data = np.array(interval_data, dtype='float')
    interval_data_max = np.max(interval_data, axis=1).reshape(raw_interval_data.shape[0], 1)

    row_sums = interval_data.sum(axis=1)
    interval_data_normalise = interval_data / row_sums[:, np.newaxis]

    x = dataset_pd[[
        'total_active_time_in_last_30d', 
        'total_session_count_in_last_30d', 
        'total_ops_count_in_last_30d']].values.astype(float)

    x = np.concatenate((interval_data_max, x), axis = 1)

    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    
    raw_data = dataset_pd.as_matrix(columns=[
    #     'total_active_time_in_last_30d',
    #     'total_session_count_in_last_30d',
    #     'total_ops_count_in_last_30d',
        'hour_normalise',
        'action_normalise',
    #     'op_interval',
        'markov'
    ])
    label = dataset_pd.as_matrix(columns=['name']).reshape(len(dataset_pd),)
    
    data = [[] for _ in range(raw_data.shape[0])]
    for i, d in enumerate(data):
        for c in raw_data[i]:
            try:
                data[i].extend(c)
            except:
                data[i].append(c)
                
    data = np.concatenate((data, x_scaled, interval_data_normalise), axis = 1)
    data = np.array(data)
    
    for i, l in enumerate(label):
        label[i] = target_type.index(l)
    label = np.array(label, dtype=int)
    
    return data, label

In [56]:
data, label = data_preprocessing(dataset_pd_restrict)

  


In [57]:
X_train, X_test, y_train, y_test = train_test_split(
    data,
    label, 
    test_size=0.2, 
    random_state=4
)

In [58]:
classifier = LGBMClassifier(n_estimators=300)
classifier.fit(X_train, y_train)
y_predict = classifier.predict(X_test)

In [59]:
print(
    classification_report(
        list(y_test), 
        list(y_predict), 
        target_names = target_type
    ))

                          precision    recall  f1-score   support

                  Normal       0.92      0.97      0.95     16499
              DP Voucher       0.86      0.82      0.84       608
                 Voucher       0.74      0.74      0.74      3998
           Free Shipping       0.70      0.64      0.67      2595
Scam - Potential Scammer       0.79      0.63      0.70       736
       Duplicate listing       0.84      0.83      0.84      1143
          Order Brushing       0.93      0.89      0.91      1868
         Welcome Package       0.90      0.77      0.83       792
           Promotion T&C       0.88      0.49      0.63       199
              Coin fraud       0.69      0.18      0.28       195

               micro avg       0.87      0.87      0.87     28633
               macro avg       0.83      0.70      0.74     28633
            weighted avg       0.87      0.87      0.87     28633



In [61]:
filter_result(classifier.predict_proba(X_test), y_test, 0.8)

0.775 are left
                          precision    recall  f1-score   support

                  Normal       0.97      0.99      0.98     15038
              DP Voucher       0.92      0.91      0.92       494
                 Voucher       0.88      0.84      0.86      2010
           Free Shipping       0.90      0.76      0.83      1073
Scam - Potential Scammer       0.94      0.80      0.86       366
       Duplicate listing       0.94      0.97      0.95       829
          Order Brushing       0.98      0.98      0.98      1567
         Welcome Package       0.95      0.86      0.90       619
           Promotion T&C       0.94      0.70      0.80       112
              Coin fraud       0.78      0.38      0.51        77

               micro avg       0.95      0.95      0.95     22185
               macro avg       0.92      0.82      0.86     22185
            weighted avg       0.95      0.95      0.95     22185

