In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import random
from statistics import mean
from tqdm import tqdm

In [None]:
import imblearn
print(imblearn.__version__)
from imblearn.over_sampling import SMOTE
from collections import Counter

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier

In [None]:
df = pd.read_csv('/kaggle/input/paysim1/PS_20174392719_1491204439457_log.csv')

In [None]:
df_transfer = df[df['type'] == 'DEBIT']
df_transfer

In [None]:
df['isFraud'].value_counts()

In [None]:
df['nameOrig'].str.extract(r'(^M)').isna().sum()

In [None]:
print("number of fradulent transactions",len(df[df['isFraud']==1]))
print("number of non-fradulent transactions",len(df[df['isFraud']==0]))

In [None]:
df.info()

# Preprocessing

In [None]:
{column:len(df[column].unique()) for column in df.columns}

In [None]:
df['type'].unique()

In [None]:
pd.get_dummies(df['type'],prefix='tp')

In [None]:
def onehot(df, column, prefix):
    df = df.copy()
    dummies = pd.get_dummies(df[column],prefix=prefix)
    df = pd.concat([df,dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [None]:
def preprocessing(df):
    df = df.copy()
    
    df = df.drop(['step', 'isFlaggedFraud'],axis=1)
    
    #one-hot encode on type column
    df = onehot(df, column='type', prefix='tp')
    
    y = df['isFraud'].copy()
    X = df.drop('isFraud', axis=1).copy()
    
    # Train-Test Split
    #random state shows that the split shuffles the data always in the same way so you'll get the same data after each run
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=123)
    
    return X_train, X_test, y_train, y_test

In [None]:
X_train, X_test, y_train, y_test = preprocessing(df)

In [None]:
print(len(X_train), len(X_test))

In [None]:
X_train

In [None]:
counter = Counter(y_train)
print(counter)

In [None]:
categ_x_train = X_train[['nameOrig','nameDest']].copy()
X_train = X_train.drop(['nameOrig','nameDest'], axis=1)

categ_x_test = X_test[['nameOrig','nameDest']].copy()
X_test = X_test.drop(['nameOrig','nameDest'], axis=1)


In [None]:
X_test

In [None]:
oversample = SMOTE()
X_train, y_train = oversample.fit_resample(X_train, y_train)
X_train = X_train.sample(frac=1.0,random_state=123).reset_index(drop=True)
y_train = y_train.sample(frac=1.0,random_state=123).reset_index(drop=True)
counter = Counter(y_train)
print(counter)

In [None]:
# KNN
knn = KNeighborsClassifier(n_neighbors=10)
model=knn.fit(X_train, y_train)
pred = model.predict(X_test)
pred


In [None]:
# XG Boost
model = XGBClassifier(n_jobs=-1)


# # summarize performance
# print('Mean ROC AUC: %.5f' % mean(scores))

In [None]:
# define evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
cv

In [None]:
# # evaluate model
for i in tqdm(range(1)):
    scores = cross_val_score(model, X_train, y_train, scoring='f1', cv=cv, n_jobs=-1)

In [None]:
# summarize performance
print('Mean F1: %.5f' % mean(scores))

In [None]:
model.fit(X_train, y_train)

In [None]:
import joblib


filename = 'finalized_model.sav'

joblib.dump(model, filename)


# load the model from disk

In [None]:
loaded_model = joblib.load(filename)

result = loaded_model.predict(X_test)

print(result)

In [None]:
y_pred = model.predict(X_test)


In [None]:
cm = confusion_matrix(y_test,y_pred)
clr = classification_report(y_test, y_pred, target_names=['Not Fraud','Fraud'])
cm

In [None]:
plt.figure(figsize=(8, 8))
sns.heatmap(cm, annot=True, vmin=0, fmt='g', cbar=False, cmap='Blues')
plt.xticks(np.arange(2)+0.5, ['Not Fraud','Fraud'])
plt.yticks(np.arange(2)+0.5, ['Not Fraud','Fraud'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
print('Classification Report:\n', clr)

In [None]:
! git clone https://github.com/gradio-app/gradio.git
    

In [None]:
! ls gradio/

In [None]:
! python gradio/setup.py install

In [None]:
! pip install gradio

In [None]:
import gradio as gr

In [None]:
def dataframe(file_obj):
    df = pd.read_csv(file_obj.name)
    df = onehot(df, column='type', prefix='tp')
    df = df.drop(['nameOrig','nameDest','step'], axis=1)
    print(df.shape)
    y_pred = model.predict(df)
    
    pred_df = pd.DataFrame(y_pred, columns = ['isFraud'])
    print(type(pred_df))
    print(pred_df.shape)
#     clr = classification_report(y_test, y_pred, target_names=['Not Fraud','Fraud'])
#     return 'Classification Report:\n'+ clr
    return pred_df


In [None]:
file = gr.inputs.File(file_count="single", type="file", label="CSV File for Predictions", optional=False)
y_pred_df = gr.outputs.Dataframe(max_rows=20, max_cols=None, overflow_row_behaviour="paginate", type="auto", label="Predictions of records in the file")
interface_csv = gr.Interface(
                            fn=dataframe, 
                            inputs=file, 
                            outputs=y_pred_df,
                            title="Fraud Detection in Mobile Money Transactions",
                            theme="dark-peach"
                            )

In [None]:
interface_csv.launch(share=True)

In [None]:
y = np.array([0,1])
y[0]

In [None]:
def dataframe(trans_type, amount, oldbalanceOrg):
    
    #calc newBalance
    switcher = {
        "PAYMENT": oldbalanceOrg-amount,
        "TRANSFER": oldbalanceOrg-amount,
        "CASH_OUT": oldbalanceOrg-amount,
        "CASH_IN": oldbalanceOrg+amount,
        "DEBIT": oldbalanceOrg-amount,
    }
    newbalanceOrig = switcher.get(trans_type,0.0)
    
    # create dataframe
    data = {'type':trans_type,
           'amount':amount,
           'oldbalanceOrg':oldbalanceOrg,
           'newbalanceOrig':newbalanceOrig,
           'oldbalanceDest':0.0,
           'newbalanceDest':0.0,
            'tp_PAYMENT':0,
            'tp_TRANSFER':0,
            'tp_CASH_OUT':0,
            'tp_CASH_IN':0,
            'tp_DEBIT':0
           }
    df = pd.DataFrame(data, index=[0])
    df = df.drop('tp_{}'.format(trans_type),axis=1)
    df = onehot(df, column='type', prefix='tp')
    print(df)
    print(df.shape)
    y_pred = model.predict_proba(df)[0].tolist()
    print((y_pred))
    
    class_names = ['Not Fraud', 'Fraud']
#     clr = classification_report(y_test, y_pred, target_names=['Not Fraud','Fraud'])
#     return 'Classification Report:\n'+ clr
    return {'Not Fraud':y_pred[0], 'Fraud':y_pred[1]},newbalanceOrig

In [None]:
trans_type = gr.inputs.Dropdown(['PAYMENT','TRANSFER','CASH_OUT','CASH_IN','DEBIT'], type="value", default=None, label="Type of Transaction")
amount = gr.inputs.Number(default=None, label="Amount of Transaction")
oldbalanceOrg = gr.inputs.Number(default=100000, label="Old Balance of Originator")
isFraud = gr.outputs.Label(num_top_classes=2, type="auto", label="Fraud or Non-Fraud")
newbalanceDest = gr.outputs.Textbox(type="auto", label="Ideal New Balance of Originator")
interface_pertrans = gr.Interface(
                                  fn=dataframe, 
                                  inputs=[trans_type, amount, oldbalanceOrg], 
                                  outputs=[isFraud,newbalanceDest],
#                                   live=True,
                                  title="Oddity - Fraud Detection System",
                                  description="""Connectivity through telecommunications has remodeled our way of lives.
                                            The use of mobile handsets have had a tremendous impact on our very existence.
                                            One important innovation brought about by mobile handsets is the ability to perform financial transactions using mobile devices.
                                            This is popularly known as mobile money transactions (MMTs) and has questionable security features.
                                            These loopholes in security and lack of education (anecdotally) on the part of users of have created space for
                                            criminals to engage in fraudulent activities making customers lose a lot of fortune and impeding the prospect of rolling
                                            the unbanked into the FinTech community.""",
                                theme="dark-peach"
                                 )

In [None]:
interface_pertrans.launch(share=True)