<center>
  <div style="font-family: Arial, sans-serif; background-color: #67b588; color: #2D2926; padding: 12px; line-height: 1; font-size: 32px; font-weight: bold;">
    Creating Test Datasets for the Fraud Detection App
  </div>
</center>

In this file, we are going to create test datasets to be used in the app. This process will make it easier to test our algorithm in the app for two main reasons:

- Codified Data: Our original data was already scaled and processed, making it nearly impossible for the user to input new data in that format.

- User-Friendly Interaction: By creating separate datasets for fraudulent and non-fraudulent cases, we make it easier for the user to test specific scenarios. If we provided a single dataset, the user would have to run multiple tests just to find one fraud case.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load dataset
fraud_df = pd.read_csv('creditcard.csv')

# Drop the 'Time' feature
fraud_df = fraud_df.drop(columns=['Time'])

# Split the dataset into features and target
X = fraud_df.drop(columns=['Class'])
y = fraud_df['Class']

# Split into training and testing sets with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Recreate the complete test dataframe
test_df = pd.concat([X_test, y_test], axis=1)

# Separate fraudulent and non-fraudulent transactions in the test set
test_fraud_yes = test_df[test_df['Class'] == 1]
test_fraud_no = test_df[test_df['Class'] == 0]

# Get the number of fraud cases
num_frauds = test_fraud_yes.shape[0]

# Ensure the number of non-fraud cases matches the number of fraud cases
test_fraud_no_balanced = test_fraud_no.sample(n=num_frauds, random_state=42)

In [2]:
test_fraud_yes

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
77348,-1.271244,2.462675,-2.851395,2.324480,-1.372245,-0.948196,-3.065234,1.166927,-2.268771,-4.881143,...,0.652941,0.081931,-0.221348,-0.523582,0.224228,0.756335,0.632800,0.250187,0.01,1
102442,-13.192671,12.785971,-9.906650,3.320337,-4.801176,5.760059,-18.750889,-37.353443,-0.391540,-5.052502,...,27.202839,-8.887017,5.303607,-0.639435,0.263203,-0.108877,1.269566,0.939407,1.00,1
119781,-2.866364,2.346949,-4.053307,3.983359,-3.463186,-1.280953,-4.474764,1.216655,-2.309829,-5.515507,...,1.049732,0.475840,0.404480,0.282030,-0.506901,-0.371741,0.615257,0.803163,124.53,1
48094,-1.278138,0.716242,-1.143279,0.217805,-1.293890,-1.168952,-2.564182,0.204532,-1.611155,-1.250286,...,0.490183,0.470427,-0.126261,-0.126644,-0.661908,-0.349793,0.454851,0.137843,24.90,1
42958,-13.897206,6.344280,-14.281666,5.581009,-12.887133,-3.146176,-15.450467,9.060281,-5.486121,-14.676470,...,3.058082,0.941180,-0.232710,0.763508,0.075456,-0.453840,-1.508968,-0.686836,9.99,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143333,-7.030308,3.421991,-9.525072,5.270891,-4.024630,-2.865682,-6.989195,3.791551,-4.622730,-8.409665,...,1.103398,-0.541855,0.036943,-0.355519,0.353634,1.042458,1.359516,-0.272188,0.00,1
119714,-0.734303,0.435519,-0.530866,-0.471120,0.643214,0.713832,-1.234572,-2.551412,-2.057724,0.166831,...,-1.004877,1.150354,-0.152555,-1.386745,0.004716,0.219146,-0.058257,0.158048,29.95,1
245347,0.051075,1.310427,0.733222,2.620282,1.402358,0.528489,1.086014,-0.146423,-1.724333,1.280167,...,0.229936,0.766927,-0.189624,0.766853,-0.141401,-0.077278,-0.297595,-0.221816,2.47,1
124176,-0.356326,1.435305,-0.813564,1.993117,2.055878,-0.543579,0.487691,0.085449,-0.536352,-2.231209,...,-0.312863,-0.687874,-0.267003,-1.158480,0.271460,-0.155397,0.114328,0.101526,1.00,1


In [3]:
test_fraud_no_balanced

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
197288,1.427126,-0.919876,-3.165507,1.378848,0.867670,-0.880074,1.482888,-0.569838,-0.018398,0.259636,...,0.453192,0.580948,-0.664473,-1.036310,0.811282,-0.162522,-0.134535,-0.026835,385.00,0
175745,1.808834,-1.333030,-0.556548,-0.165515,-1.388384,-0.731744,-0.663268,-0.092321,0.479492,0.517942,...,-0.503476,-1.234127,0.331022,-0.161604,-0.565929,-0.989281,0.043581,0.000009,158.94,0
29594,1.028807,-0.167460,1.344620,1.649794,-0.717662,0.807931,-0.610833,0.397065,0.923157,-0.257201,...,-0.348086,-0.536813,0.095209,0.228722,0.381645,-0.540081,0.090843,0.020739,7.40,0
195743,-1.824223,-0.620021,-0.467199,-0.407000,-0.476089,1.391575,2.452366,0.441657,-0.843092,-1.670947,...,0.284532,-0.127787,1.247207,-1.153693,-0.066464,0.361555,-0.209685,0.060636,556.16,0
234355,1.984241,0.115783,-1.659127,0.440728,0.174926,-1.167454,0.153436,-0.217630,0.358445,-0.308375,...,0.260785,0.869906,-0.019846,-0.051755,0.186102,-0.121079,-0.005895,-0.040131,11.30,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240762,-0.865525,0.765738,-0.963221,1.152971,1.481481,-1.806373,0.880073,-0.021340,-1.181739,0.013559,...,0.411015,1.033996,-0.205121,0.098302,-0.295961,-0.397190,0.376233,0.089625,3.60,0
222676,1.939926,-0.322162,-0.029267,0.623025,-0.934518,-0.835067,-0.507358,-0.157575,1.267921,-0.200845,...,0.236689,0.988161,0.155030,0.516429,-0.116005,-0.245503,0.049183,-0.028995,12.99,0
82725,-1.152466,-0.350442,1.530463,0.459703,0.472315,-0.242138,1.344712,-0.429407,-0.603544,0.280028,...,-0.403237,-0.912994,0.627093,0.165803,-0.704149,-0.146309,-0.303466,-0.288624,174.14,0
102435,-0.806308,1.185333,1.202421,-0.196322,0.163735,-0.633848,0.711521,-0.087025,-0.143648,0.099515,...,-0.287656,-0.671550,0.012064,-0.129644,-0.054336,0.097986,0.264879,0.157795,9.42,0


In [None]:
# # Save the datasets to Excel files
# test_fraud_yes.to_excel("test_fraud_yes.xlsx", index=False)
# test_fraud_no_balanced.to_excel("test_fraud_no.xlsx", index=False)