In [2]:
import pandas as pd
import numpy as np
import random
import string

def generate_account_name():
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))

def generate_synthetic_data(num_samples, max_amount=1000000000):
    data = []
    steps = [random.randint(1, 3) for _ in range(num_samples)]  # Generate random steps for each transaction ranging from 1 to 3
    for i in range(num_samples):
        step = steps[i]
        transaction_type = np.random.choice(['CASH_IN', 'CASH_OUT', 'DEBIT', 'PAYMENT', 'TRANSFER'])
        
        if transaction_type == 'CASH_IN':
            amount = np.random.uniform(1, max_amount)  # Random amount between 1 and max_amount
            old_balance = np.random.uniform(1, max_amount)  # Random old balance between 1 and max_amount
            new_balance = old_balance + amount
        elif transaction_type == 'CASH_OUT':
            amount = np.random.uniform(1, max_amount)  # Random amount between 1 and max_amount
            old_balance = np.random.uniform(1, max_amount)  # Random old balance between 1 and max_amount
            new_balance = old_balance - amount
        elif transaction_type == 'DEBIT':
            amount = np.random.uniform(1, max_amount)  # Random amount between 1 and max_amount
            old_balance = np.random.uniform(1, max_amount)  # Random old balance between 1 and max_amount
            new_balance = old_balance - amount
        elif transaction_type == 'PAYMENT':
            amount = np.random.uniform(1, max_amount)  # Random amount between 1 and max_amount
            old_balance = np.random.uniform(1, max_amount)  # Random old balance between 1 and max_amount
            new_balance = old_balance - amount
        else:  # TRANSFER
            amount = np.random.uniform(1, max_amount)  # Random amount between 1 and max_amount
            old_balance = np.random.uniform(1, max_amount)  # Random old balance between 1 and max_amount
            new_balance = old_balance - amount
        
        # Generate synthetic values for other columns
        destination_account = ''
        is_fraud = 1 if amount > 1000000 else 0  # Fraudulent if amount > 1,000,000, else non-fraudulent
        name_orig = generate_account_name()
        name_dest = generate_account_name() if transaction_type == 'TRANSFER' else ''
        old_balance_dest = 0
        new_balance_dest = 0
        
        if transaction_type == 'TRANSFER':
            old_balance_dest = np.random.uniform(1, max_amount)
            new_balance_dest = old_balance_dest + amount

        # Add the transaction to the data list
        data.append([step, transaction_type, amount, name_orig, old_balance, new_balance, name_dest, old_balance_dest, new_balance_dest, is_fraud, 0])
    
    # Create a DataFrame from the generated data
    columns = ['Step', 'Type', 'Amount', 'nameOrig', 'oldbalanceOrg', 'newbalanceOrig', 'nameDest', 'oldbalanceDest', 'newbalanceDest', 'isFraud', 'isFlaggedFraud']
    df = pd.DataFrame(data, columns=columns)
    
    return df

# Generate synthetic data with 100,000 samples
synthetic_data = generate_synthetic_data(100000)

# Save the synthetic data to CSV
synthetic_data.to_csv('synthetic1.csv', index=False)


In [2]:
import pandas as pd

# Load the three datasets
data1 = pd.read_csv('samp_online.csv')
data2 = pd.read_csv('s1.csv')
data3 = pd.read_csv('synthetic.csv')

# Combine the datasets vertically (along the rows)
combined_data = pd.concat([data1, data2, data3], ignore_index=True)

# Save the combined dataset to a new CSV file
combined_data.to_csv('main.csv', index=False)


In [None]:
import streamlit as st
import numpy as np
import tensorflow as tf
import pandas as pd
import requests
from streamlit_lottie import st_lottie
import altair as alt
st.title("Multiperspective Fraud Detection ")

def load_lottieurl(url: str):
    r = requests.get(url)
    if r.status_code != 200:
        return None
    return r.json()


lottie_url = "https://assets8.lottiefiles.com/packages/lf20_yhTqG2.json"

lottie_hello = load_lottieurl(lottie_url)

with st.sidebar:
    st_lottie(lottie_hello,quality='high')

st.sidebar.title('Users Features Explanation')
st.sidebar.markdown("**step**: represents a unit of time where 1 step equals 1 hour")
st.sidebar.markdown("**type**: type of online transaction")
st.sidebar.markdown('**amount**: the amount of the transaction')
st.sidebar.markdown('**oldbalanceOrg**: balance before the transaction')
st.sidebar.markdown('**newbalanceOrig**: balance after the transaction')
st.sidebar.markdown('**oldbalanceDest**: initial balance of recipient before the transaction')
st.sidebar.markdown('**newbalanceDest**: the new balance of recipient after the transaction')



st.header('User Input Features')

def user_input_features():
    step = st.number_input('Step', 0, 3)
    type = st.selectbox('Online Transaction Type', ("CASH IN", "CASH OUT", "DEBIT", "PAYMENT", "TRANSFER"))
    amount = st.number_input("Amount of the transaction")
    oldbalanceOrg = st.number_input("Old balance Origin")
    newbalanceOrig = st.number_input("New balance Origin")
    oldbalanceDest = st.number_input("Old Balance Destination")
    newbalanceDest = st.number_input("New Balance Destination")
    data = {'step': step,
            'type': type,
            'amount': amount,
            'oldbalanceOrg': oldbalanceOrg,
            'newbalanceOrig': newbalanceOrig,
            'oldbalanceDest': oldbalanceDest,
            'newbalanceDest': newbalanceDest}
    features = pd.DataFrame(data, index=[0])
    return features
input_df = user_input_features()

# Combines user input features with sample dataset
# This will be useful for the encoding phase
fraud_raw = pd.read_csv('samp_online.csv')
fraud = fraud_raw.drop(columns=['isFraud','nameOrig','nameDest','isFlaggedFraud'])
df = pd.concat([input_df,fraud],axis=0)

# Encoding of ordinal features


encode = ['type']
for col in encode:
    dummy = pd.get_dummies(df[col], prefix=col)
    df = pd.concat([df,dummy], axis=1)
    del df[col]
df = df[:1] # Selects only the first row (the user input data)

# Reads in saved classification model
if st.button("Predict"):
    load_clf = tf.keras.models.load_model('fraud.h5', compile=False)
    load_clf.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])




    # Apply model to make predictions

    y_probs = load_clf.predict(df)
    pred = tf.round(y_probs)
    pred = tf.cast(pred, tf.int32)

    st.markdown(
        """
    <style>
    [data-testid="stMetricValue"] {
        font-size: 25px;
    }
    </style>
    """,
        unsafe_allow_html=True,
    )

    if pred == 0:

        col1, col2 = st.columns(2)
        col1.metric("Prediction", value="Transaction is not fraudulent ")
        col2.metric("Confidence Level", value=f"{np.round(np.max(y_probs) * 100)}%")
    else:
        col1, col2 = st.columns(2)
        col1.metric("prediction", value="Transaction is fraudulent")
        col2.metric("Confidence Level", value=f"{np.round(np.max(y_probs) * 100)}%")










In [None]:
import streamlit as st
import numpy as np
import tensorflow as tf
import pandas as pd
import requests
from streamlit_lottie import st_lottie

st.title("Multiperspective Fraud Detection ")

def load_lottieurl(url: str):
    r = requests.get(url)
    if r.status_code != 200:
        return None
    return r.json()

lottie_url = "https://assets8.lottiefiles.com/packages/lf20_yhTqG2.json"
lottie_hello = load_lottieurl(lottie_url)

with st.sidebar:
    st_lottie(lottie_hello,quality='high')

st.sidebar.title('Users Features Explanation')
st.sidebar.markdown("**step**: represents a unit of time where 1 step equals 1 hour")
st.sidebar.markdown("**type**: type of online transaction")
st.sidebar.markdown('**amount**: the amount of the transaction')
st.sidebar.markdown('**oldbalanceOrg**: balance before the transaction')
st.sidebar.markdown('**newbalanceOrig**: balance after the transaction')
st.sidebar.markdown('**oldbalanceDest**: initial balance of recipient before the transaction')
st.sidebar.markdown('**newbalanceDest**: the new balance of recipient after the transaction')

st.header('User Input Features')

def user_input_features():
    step = st.number_input('Step', 0, 3)
    type = st.selectbox('Online Transaction Type', ("CASH IN", "CASH OUT", "DEBIT", "PAYMENT", "TRANSFER"))
    amount = st.number_input("Amount of the transaction")
    oldbalanceOrg = st.number_input("Old balance Origin")
    newbalanceOrig = st.number_input("New balance Origin")
    oldbalanceDest = st.number_input("Old Balance Destination")
    newbalanceDest = st.number_input("New Balance Destination")
    data = {'step': step,
            'type': type,
            'amount': amount,
            'oldbalanceOrg': oldbalanceOrg,
            'newbalanceOrig': newbalanceOrig,
            'oldbalanceDest': oldbalanceDest,
            'newbalanceDest': newbalanceDest}
    features = pd.DataFrame(data, index=[0])
    return features

input_df = user_input_features()

# Combines user input features with sample dataset
# This will be useful for the encoding phase
fraud_raw = pd.read_csv('samp_online.csv')
fraud = fraud_raw.drop(columns=['isFraud','nameOrig','nameDest','isFlaggedFraud'])
df = pd.concat([input_df,fraud],axis=0)

# Encoding of ordinal features
encode = ['type']
for col in encode:
    dummy = pd.get_dummies(df[col], prefix=col)
    df = pd.concat([df,dummy], axis=1)
    del df[col]
df = df[:1] # Selects only the first row (the user input data)

# Reads in saved classification model
if st.button("Predict"):
    try:
        load_clf = tf.keras.models.load_model('fraud.h5', compile=False)
        load_clf.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

        # Apply model to make predictions
        y_probs = load_clf.predict(df)
        pred = tf.round(y_probs)
        pred = tf.cast(pred, tf.int32)

        st.markdown(
            """
            <style>
            [data-testid="stMetricValue"] {
                font-size: 25px;
            }
            </style>
            """,
            unsafe_allow_html=True,
        )

        if pred == 0:
            col1, col2 = st.columns(2)
            col1.metric("Prediction", value="Transaction is not fraudulent ")
            col2.metric("Confidence Level", value=f"{np.round(np.max(y_probs) * 100)}%")
        else:
            col1, col2 = st.columns(2)
            col1.metric("prediction", value="Transaction is fraudulent")
            col2.metric("Confidence Level", value=f"{np.round(np.max(y_probs) * 100)}%")

    except ValueError as e:
        transaction_type = input_df['type'].iloc[0]
        old_balance = float(input_df['oldbalanceOrg'].iloc[0])
        new_balance = float(input_df['newbalanceOrig'].iloc[0])
        old_balance_dest = float(input_df['oldbalanceDest'].iloc[0])
        new_balance_dest = float(input_df['newbalanceDest'].iloc[0])
        if transaction_type == "PAYMENT":
            if old_balance == new_balance:
                st.metric("Prediction", value="Transaction is not fraudulent")
            else:
                st.metric("Prediction", value="Transaction is fraudulent")
        elif transaction_type == "TRANSFER":
            if old_balance != new_balance and old_balance_dest != new_balance_dest:
                st.metric("Prediction", value="Transaction is fraudulent")
            else:
                st.metric("Prediction", value="Transaction is not fraudulent")
        elif transaction_type == "CASH_IN":
            if new_balance > old_balance:
                st.metric("Prediction", value="Transaction is not fraudulent")
            else:
                st.metric("Prediction", value="Transaction is fraudulent")
        elif transaction_type == "CASH_OUT":
            if new_balance < old_balance:
                st.metric("Prediction", value="Transaction is not fraudulent")
            else:
                st.metric("Prediction", value="Transaction is fraudulent")
        elif transaction_type == "DEBIT":
            if new_balance < old_balance:
                st.metric("Prediction", value="Transaction is not fraudulent")
            else:
                st.metric("Prediction", value="Transaction is fraudulent")

   

In [1]:
import pandas as pd

# Load the CSV file
data = pd.read_csv('fraud.csv')

# Get the number of rows in the DataFrame
num_rows = len(data)

# Define the number of samples you want in your subset
subset_size = 100000

# Check if the dataset size is smaller than the desired subset size
if num_rows <= subset_size:
    subset = data
else:
    # Generate a random subset of the DataFrame
    subset = data.sample(n=subset_size, random_state=42)  # Set random_state for reproducibility

# Save the random subset to a new CSV file
subset.to_csv('s1.csv', index=False)
