# Data loading

In [1]:
import pandas as pd
import numpy as np
import mysql.connector

# Connect to MySQL database
conn = mysql.connector.connect(
    host="localhost",
    user="root",
    password="Simran45%",
    database=" users_data_new_10"
)
cursor = conn.cursor()

# Function to fetch data from MySQL and create DataFrame
def fetch_data_to_dataframe(table_name):
    query = f"SELECT * FROM {table_name}"
    df = pd.read_sql(query, conn)
    return df

# List of table names
table_names = [
    "BlockchainTransactions",
    "UserProfile",
    "BehavioralPatterns",
    "CreditAndFinancialHistory",
    "NetworkAnalysis",
    "SentimentAnalysis",
    "CommunityBehavior",
    "SystemAndPlatformScores",
    "DeviceAndIPInformation",
    "MachineLearningFeatures",
    "HistoricalFraudData",
    "ExternalDataSources"
]

# Dictionary to store DataFrames
dataframes = {}

# Fetch data and create DataFrames for each table
for table_name in table_names:
    dataframes[table_name] = fetch_data_to_dataframe(table_name)

# Close the database connection
conn.close()


  df = pd.read_sql(query, conn)


# Preprocessing

In [2]:
def droper(data_frames, table_name, col_arr):
    data_frames[table_name] = data_frames[table_name].drop(columns=col_arr)

In [3]:
# Define the columns to drop
columns_to_drop = ['Name', 'Address','Email','UserID','PhoneNumber','AccountCreationDate']

# Specify the name of the DataFrame in the 'dataframes' dictionary
table_update = 'UserProfile'

# Call the function to drop columns
droper(dataframes, table_update, columns_to_drop)
dataframes["UserProfile"].sample(10)

Unnamed: 0,KYCStatus
17,Pending
9,Verified
0,Verified
11,Pending
13,Verified
8,Pending
10,Verified
12,Verified
2,Pending
5,Pending


In [4]:
from sklearn.preprocessing import LabelEncoder
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import nltk


def apply_nlp_label_encoding(df, column_names):
    # Tokenization and Stemming
    stemmer = PorterStemmer()
    for column in column_names:
        if column in df.columns:
            if df[column].dtype == 'object':  # Check if the column contains text data
                df[column] = df[column].apply(lambda x: ' '.join([stemmer.stem(word) for word in word_tokenize(x.lower())]))
    
    # Label Encoding
    label_encoder = LabelEncoder()
    for column in column_names:
        if column in df.columns:
            if df[column].dtype == 'object':  # Check if the column contains categorical data
                df[column] = label_encoder.fit_transform(df[column])
    return df

# Assuming 'dataframes' is your dictionary of DataFrames
# Assuming 'BehavioralPatterns' is the key for the DataFrame of interest
dataframes["UserProfile"] = apply_nlp_label_encoding(dataframes["UserProfile"], ['KYCStatus'])

In [5]:
import pandas as pd


def handle_dates(data, column_names):
    df = data.copy()
    for column_name in column_names:
        df[column_name] = pd.to_datetime(df[column_name])
    return df
dataframes['BlockchainTransactions'] = handle_dates(dataframes['BlockchainTransactions'], ['Timestamp'])

In [6]:
dataframes['BlockchainTransactions']

Unnamed: 0,TransactionID,Timestamp,SenderAddress,ReceiverAddress,AmountTransferred,TransactionFee,BlockHeight
0,1,2024-03-13 09:25:00,sender_address_1,receiver_address_1,100.25,0.5,11234
1,2,2024-03-12 18:45:00,sender_address_2,receiver_address_2,250.75,1.2,11235
2,3,2024-03-11 14:30:00,sender_address_3,receiver_address_3,50.1,0.25,11236
3,4,2024-03-10 11:15:00,sender_address_4,receiver_address_4,75.6,0.4,11237
4,5,2024-03-09 08:10:00,sender_address_5,receiver_address_5,150.3,0.8,11238
5,6,2024-03-08 16:20:00,sender_address_6,receiver_address_6,300.9,1.5,11239
6,7,2024-03-07 12:45:00,sender_address_7,receiver_address_7,200.75,1.0,11240
7,8,2024-03-06 10:30:00,sender_address_8,receiver_address_8,80.4,0.3,11241
8,9,2024-03-05 14:55:00,sender_address_9,receiver_address_9,120.2,0.6,11242
9,10,2024-03-04 17:20:00,sender_address_10,receiver_address_10,180.65,0.9,11243


In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

def apply_feature_scaling(dataframe, column_names):


    # Extract the specified columns from the DataFrame
    X = dataframe[column_names].values

    # Split the data into training and testing sets
    X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

    # Generate placeholder y_train and y_test for demonstration
    y_train = np.random.rand(X_train.shape[0])
    y_test = np.random.rand(X_test.shape[0])

    # Define a dictionary to store the results of different scaling methods
    scaling_results = {}

    # Apply different scaling techniques
    for scaler in [StandardScaler(), MinMaxScaler(), RobustScaler()]:
        scaled_X_train = scaler.fit_transform(X_train)
        scaled_X_test = scaler.transform(X_test)

        # Train a linear regression model
        model = LinearRegression()
        model.fit(scaled_X_train, y_train)

        # Evaluate the model
        y_pred = model.predict(scaled_X_test)
        mse = mean_squared_error(y_test, y_pred)

        # Store the results
        scaling_results[str(scaler)] = mse

    # Find the best scaling method with the lowest MSE
    best_scaling = min(scaling_results, key=scaling_results.get)

    return best_scaling

# Assuming 'dataframes' is your dictionary of DataFrames
best_scaling = apply_feature_scaling(dataframes['BlockchainTransactions'], ['AmountTransferred', 'TransactionFee', 'BlockHeight'])

print("Best scaling method:", best_scaling)


Best scaling method: RobustScaler()


# applying scaling

In [8]:
from sklearn.preprocessing import MinMaxScaler



# Selecting the columns to scale
columns_to_scale = ['AmountTransferred', 'TransactionFee', 'BlockHeight']

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform the selected columns
dataframes['BlockchainTransactions'][columns_to_scale] = scaler.fit_transform(dataframes['BlockchainTransactions'][columns_to_scale])

# Now the selected columns are scaled between 0 and 1


In [9]:
from sklearn.preprocessing import StandardScaler

def numerical_preprocessing(data_frames, table_name, columns):
    if table_name in data_frames:
        scaler = StandardScaler()
        data_frames[table_name][columns] = scaler.fit_transform(data_frames[table_name][columns])
    else:
        print(f"Table '{table_name}' not found in the data_frames dictionary.")

# Assuming 'dataframes' is your dictionary of DataFrames
numerical_preprocessing(dataframes, "BehavioralPatterns", ['TransactionFrequency'])
dataframes['BehavioralPatterns']

Unnamed: 0,UserID,TransactionFrequency,TransactionSizeDistribution,GeographicInconsistencies,TimeOfDayPatterns,RegularVsIrregularBehavior,ChangesInBehaviorOverTime
0,1,-0.575469,Normal,No,Morning,Regular,Stable
1,2,-1.583419,Skewed towards large transactions,Yes,Evening,Irregular,Increasing
2,3,1.206024,Normal,No,Afternoon,Regular,Stable
3,4,0.08087,Normal,Yes,Night,Irregular,Stable
4,5,0.877854,Normal,No,Morning,Regular,Stable
5,6,-0.95052,Skewed towards small transactions,No,Evening,Regular,Decreasing
6,7,1.651397,Normal,Yes,Afternoon,Irregular,Increasing
7,8,-1.255249,Normal,No,Night,Regular,Stable
8,9,-0.176977,Normal,No,Morning,Regular,Stable
9,10,0.432481,Normal,Yes,Evening,Irregular,Stable


In [10]:
from sklearn.preprocessing import RobustScaler


# Selecting the columns to scale
columns_to_scale = ['CreditScore', 'CreditCardUtilization', 'IncomeLevel', 'DebtToIncomeRatio']

# Initialize RobustScaler
scaler = RobustScaler()

# Fit and transform the selected columns
dataframes['CreditAndFinancialHistory'][columns_to_scale] = scaler.fit_transform(dataframes['CreditAndFinancialHistory'][columns_to_scale])

# Now the selected columns are scaled using Robust Scaling
dataframes['CreditAndFinancialHistory']

Unnamed: 0,UserID,CreditScore,LoanRepaymentHistory,CreditCardUtilization,IncomeLevel,DebtToIncomeRatio,PastFraudulentActivity
0,1,0.285714,Good,0.0,-0.238095,-0.060606,No
1,2,-1.047619,Fair,1.333333,-1.190476,1.151515,No
2,3,1.428571,Excellent,-0.666667,0.714286,-1.272727,yes
3,4,-0.095238,Good,0.666667,0.238095,0.30303,No
4,5,-0.47619,Good,0.0,-0.428571,0.545455,No
5,6,-0.857143,Fair,2.0,-1.0,0.787879,No
6,7,0.857143,Excellent,-0.666667,1.190476,-0.666667,yes
7,8,-0.095238,Good,0.666667,0.238095,-0.060606,No
8,9,0.47619,Good,0.0,-0.047619,-0.424242,No
9,10,-0.47619,Fair,1.333333,-0.714286,0.545455,No


In [11]:
from sklearn.preprocessing import StandardScaler

# Extract the DataFrame
df = dataframes['NetworkAnalysis']

# Columns to be scaled
columns_to_scale = ['DegreeCentrality', 'ClusteringCoefficients']

# Initialize the StandardScaler
scaler = StandardScaler()

# Apply StandardScaler to the specified columns
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

# Update the DataFrame in the dataframes dictionary
dataframes['NetworkAnalysis'] = df
dataframes['NetworkAnalysis']

Unnamed: 0,SenderUserID,ReceiverUserID,DegreeCentrality,ClusteringCoefficients,AnomaliesInNetwork
0,1,2,0.03035,-0.914966,
1,2,3,1.44666,-1.613413,
2,3,1,-1.183631,0.900997,
3,4,5,0.23268,-0.076829,
4,5,6,1.64899,-1.334034,
5,6,7,-0.778971,0.481929,
6,7,8,0.83967,-0.356208,
7,8,9,-1.385961,1.459755,
8,9,10,0.03035,-0.495898,
9,10,11,1.24433,-1.054655,


In [12]:
from sklearn.preprocessing import MinMaxScaler

# Assuming dataframes is a dictionary containing your DataFrames
# Extract the DataFrame containing 'SystemTrustScore' and 'PlatformReliabilityScore'
df_system_platform_scores = dataframes['SystemAndPlatformScores'][['SystemTrustScore', 'PlatformReliabilityScore']]

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform the data
scaled_data = scaler.fit_transform(df_system_platform_scores)

# Create a new DataFrame with scaled data
scaled_df_system_platform_scores = pd.DataFrame(scaled_data, columns=['SystemTrustScore_scaled', 'PlatformReliabilityScore_scaled'])

# Update the original DataFrame with the scaled scores
dataframes['SystemAndPlatformScores'] = pd.concat([dataframes['SystemAndPlatformScores'].drop(['SystemTrustScore', 'PlatformReliabilityScore'], axis=1), scaled_df_system_platform_scores], axis=1)



# encoding

In [13]:
def OnehotcodeEncoding(df,categorical_columns):

    df = pd.get_dummies(df, columns=categorical_columns)
    return df


In [14]:
def lableEncoding(df,categorical_columns):
    from sklearn.preprocessing import LabelEncoder
    label_encoder = LabelEncoder()


    df[categorical_columns] = df[categorical_columns].apply(label_encoder.fit_transform)
    return df

In [15]:
 dataframes['DeviceAndIPInformation'] = lableEncoding(dataframes['DeviceAndIPInformation'],['ProxyOrVPNUsage'])

In [16]:
dataframes['CreditAndFinancialHistory'] = apply_nlp_label_encoding(dataframes['CreditAndFinancialHistory'], ['LoanRepaymentHistory', 'PastFraudulentActivity', ])
dataframes['CreditAndFinancialHistory']

Unnamed: 0,UserID,CreditScore,LoanRepaymentHistory,CreditCardUtilization,IncomeLevel,DebtToIncomeRatio,PastFraudulentActivity
0,1,0.285714,2,0.0,-0.238095,-0.060606,0
1,2,-1.047619,1,1.333333,-1.190476,1.151515,0
2,3,1.428571,0,-0.666667,0.714286,-1.272727,1
3,4,-0.095238,2,0.666667,0.238095,0.30303,0
4,5,-0.47619,2,0.0,-0.428571,0.545455,0
5,6,-0.857143,1,2.0,-1.0,0.787879,0
6,7,0.857143,0,-0.666667,1.190476,-0.666667,1
7,8,-0.095238,2,0.666667,0.238095,-0.060606,0
8,9,0.47619,2,0.0,-0.047619,-0.424242,0
9,10,-0.47619,1,1.333333,-0.714286,0.545455,0


In [17]:

dataframes['BehavioralPatterns'] = apply_nlp_label_encoding(dataframes['BehavioralPatterns'], ['TransactionSizeDistribution', 'GeographicInconsistencies', 'TimeOfDayPatterns', 'RegularVsIrregularBehavior', 'ChangesInBehaviorOverTime'])
dataframes['BehavioralPatterns']

Unnamed: 0,UserID,TransactionFrequency,TransactionSizeDistribution,GeographicInconsistencies,TimeOfDayPatterns,RegularVsIrregularBehavior,ChangesInBehaviorOverTime
0,1,-0.575469,0,0,2,1,2
1,2,-1.583419,1,1,1,0,1
2,3,1.206024,0,0,0,1,2
3,4,0.08087,0,1,3,0,2
4,5,0.877854,0,0,2,1,2
5,6,-0.95052,2,0,1,1,0
6,7,1.651397,0,1,0,0,1
7,8,-1.255249,0,0,3,1,2
8,9,-0.176977,0,0,2,1,2
9,10,0.432481,0,1,1,0,2


In [18]:
 dataframes['HistoricalFraudData'] = lableEncoding(dataframes['HistoricalFraudData'],['FraudulentTransactions','PreviousFraudulentActivity'])

In [19]:
dataframes['MachineLearningFeatures'] = lableEncoding(dataframes['MachineLearningFeatures'],['HistoricalFraudData','AnomalyDetectionFeatures','ClusteringFeatures','DeepLearningModelFeatures'])
dataframes['MachineLearningFeatures'] 

Unnamed: 0,UserID,HistoricalFraudData,AnomalyDetectionFeatures,ClusteringFeatures,DeepLearningModelFeatures
0,1,0,0,0,1
1,2,0,0,1,0
2,3,0,0,0,1
3,4,0,0,0,1
4,5,0,0,1,0
5,6,0,0,0,1
6,7,0,0,0,1
7,8,0,0,1,0
8,9,0,0,0,1
9,10,0,0,0,1


In [20]:
dataframes['SentimentAnalysis'] = apply_nlp_label_encoding(dataframes['SentimentAnalysis'],['SocialMediaSentiment','TransactionSentiment', 'FinancialTransactionSentiment'])
dataframes['SentimentAnalysis']

Unnamed: 0,UserID,SocialMediaSentiment,TransactionSentiment,FinancialTransactionSentiment
0,1,1,1,1
1,2,0,0,0
2,3,1,2,1
3,4,1,1,1
4,5,0,0,0
5,6,1,2,1
6,7,1,1,1
7,8,0,0,0
8,9,1,2,1
9,10,1,1,1


In [21]:

dataframes['NetworkAnalysis'] = lableEncoding(dataframes['NetworkAnalysis'],['AnomaliesInNetwork'])

In [22]:
dataframes['SystemAndPlatformScores'] = lableEncoding(dataframes['SystemAndPlatformScores'],['PastSecurityIncidents'])

In [23]:
dataframes['CommunityBehavior'] = apply_nlp_label_encoding(dataframes['CommunityBehavior'],['ParticipationInForums','FeedbackFromPeers','CommunityEndorsementsWarnings'])


droping

In [24]:
# Define the columns to drop
columns_to_drop = ['TransactionID', 'SenderAddress','ReceiverAddress']

# Specify the name of the DataFrame in the 'dataframes' dictionary
table_update = 'BlockchainTransactions'

# Call the function to drop columns
droper(dataframes, table_update, columns_to_drop)


In [25]:
# Define the columns to drop
columns_to_drop = ['UserID','IPAddress','DeviceLocation','DeviceID','DeviceTypeAndVersion']

# Specify the name of the DataFrame in the 'dataframes' dictionary
table_update = 'DeviceAndIPInformation'

# Call the function to drop columns
droper(dataframes, table_update, columns_to_drop)
dataframes['DeviceAndIPInformation']

Unnamed: 0,ProxyOrVPNUsage
0,0
1,1
2,0
3,0
4,1
5,0
6,1
7,0
8,1
9,0


In [26]:
# Define the columns to drop
columns_to_drop = ['UserID']

# Specify the name of the DataFrame in the 'dataframes' dictionary
table_update = 'MachineLearningFeatures'

# Call the function to drop columns
droper(dataframes, table_update, columns_to_drop)

In [27]:
# Define the columns to drop
columns_to_drop = ['TransactionID','BlacklistedEntities']

# Specify the name of the DataFrame in the 'dataframes' dictionary
table_update = 'HistoricalFraudData'

# Call the function to drop columns
droper(dataframes, table_update, columns_to_drop)

In [28]:
# Define the columns to drop
columns_to_drop = ['UserID']

# Specify the name of the DataFrame in the 'dataframes' dictionary
table_update = 'BehavioralPatterns'

# Call the function to drop columns
droper(dataframes, table_update, columns_to_drop)

In [29]:
# Define the columns to drop
columns_to_drop = ['UserID']

# Specify the name of the DataFrame in the 'dataframes' dictionary
table_update = 'CreditAndFinancialHistory'

# Call the function to drop columns
droper(dataframes, table_update, columns_to_drop)
dataframes['CreditAndFinancialHistory']

Unnamed: 0,CreditScore,LoanRepaymentHistory,CreditCardUtilization,IncomeLevel,DebtToIncomeRatio,PastFraudulentActivity
0,0.285714,2,0.0,-0.238095,-0.060606,0
1,-1.047619,1,1.333333,-1.190476,1.151515,0
2,1.428571,0,-0.666667,0.714286,-1.272727,1
3,-0.095238,2,0.666667,0.238095,0.30303,0
4,-0.47619,2,0.0,-0.428571,0.545455,0
5,-0.857143,1,2.0,-1.0,0.787879,0
6,0.857143,0,-0.666667,1.190476,-0.666667,1
7,-0.095238,2,0.666667,0.238095,-0.060606,0
8,0.47619,2,0.0,-0.047619,-0.424242,0
9,-0.47619,1,1.333333,-0.714286,0.545455,0


In [30]:
# Define the columns to drop
columns_to_drop = ['UserID']

# Specify the name of the DataFrame in the 'dataframes' dictionary
table_update = 'SystemAndPlatformScores'

# Call the function to drop columns
droper(dataframes, table_update, columns_to_drop)

In [31]:
# Define the columns to drop
columns_to_drop = ['UserID']

# Specify the name of the DataFrame in the 'dataframes' dictionary
table_update = 'CommunityBehavior'

# Call the function to drop columns
droper(dataframes, table_update, columns_to_drop)

# combining data

In [32]:
import pandas as pd

def combiner(dataframes, table_names):
    combined_dataframes = {}
    for table_name in table_names:
        combined_dataframes[table_name] = dataframes[table_name]
        
    combined_df = pd.concat(combined_dataframes.values(), axis=1)
    return combined_df

# Assuming dataframes is a dictionary containing DataFrames for each table
combine_df = combiner(dataframes, table_names)


In [33]:
table_names = [
    "BlockchainTransactions",
    "UserProfile",
    "BehavioralPatterns",
    "CreditAndFinancialHistory",
    "NetworkAnalysis",
    "SentimentAnalysis",
    "CommunityBehavior",
    "SystemAndPlatformScores",
    "DeviceAndIPInformation",
    "MachineLearningFeatures",
    "HistoricalFraudData",
    
]

combine_df=combiner(dataframes,table_names)

In [34]:
combine_df.shape

(20, 40)

In [35]:
combine_df

Unnamed: 0,Timestamp,AmountTransferred,TransactionFee,BlockHeight,KYCStatus,TransactionFrequency,TransactionSizeDistribution,GeographicInconsistencies,TimeOfDayPatterns,RegularVsIrregularBehavior,...,PastSecurityIncidents,SystemTrustScore_scaled,PlatformReliabilityScore_scaled,ProxyOrVPNUsage,HistoricalFraudData,AnomalyDetectionFeatures,ClusteringFeatures,DeepLearningModelFeatures,FraudulentTransactions,PreviousFraudulentActivity
0,2024-03-13 09:25:00,0.19996,0.2,0.0,1,-0.575469,0,0,2,1,...,0,0.769231,0.769231,0,0,0,0,1,0,0
1,2024-03-12 18:45:00,0.80004,0.76,0.052632,1,-1.583419,1,1,1,0,...,0,0.384615,0.384615,1,0,0,1,0,0,0
2,2024-03-11 14:30:00,0.0,0.0,0.105263,0,1.206024,0,0,0,1,...,0,0.961538,0.961538,0,0,0,0,1,1,0
3,2024-03-10 11:15:00,0.101675,0.12,0.157895,1,0.08087,0,1,3,0,...,0,0.576923,0.576923,0,0,0,0,1,0,1
4,2024-03-09 08:10:00,0.399522,0.44,0.210526,1,0.877854,0,0,2,1,...,0,0.192308,0.192308,1,0,0,1,0,0,0
5,2024-03-08 16:20:00,1.0,1.0,0.263158,0,-0.95052,2,0,1,1,...,0,0.846154,0.846154,0,0,0,0,1,1,1
6,2024-03-07 12:45:00,0.600678,0.6,0.315789,1,1.651397,0,1,0,0,...,0,0.692308,0.692308,1,0,0,0,1,0,1
7,2024-03-06 10:30:00,0.120813,0.04,0.368421,1,-1.255249,0,0,3,1,...,0,0.0,0.0,0,0,0,1,0,0,0
8,2024-03-05 14:55:00,0.279506,0.28,0.421053,0,-0.176977,0,0,2,1,...,0,0.923077,0.923077,1,0,0,0,1,0,1
9,2024-03-04 17:20:00,0.520534,0.52,0.473684,1,0.432481,0,1,1,0,...,0,0.807692,0.807692,0,0,0,0,1,1,1


In [36]:
combine_df.drop(columns=['Timestamp'], inplace=True)


In [37]:
combine_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 39 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   AmountTransferred                20 non-null     float64
 1   TransactionFee                   20 non-null     float64
 2   BlockHeight                      20 non-null     float64
 3   KYCStatus                        20 non-null     int32  
 4   TransactionFrequency             20 non-null     float64
 5   TransactionSizeDistribution      20 non-null     int32  
 6   GeographicInconsistencies        20 non-null     int32  
 7   TimeOfDayPatterns                20 non-null     int32  
 8   RegularVsIrregularBehavior       20 non-null     int32  
 9   ChangesInBehaviorOverTime        20 non-null     int32  
 10  CreditScore                      20 non-null     float64
 11  LoanRepaymentHistory             20 non-null     int32  
 12  CreditCardUtilization   

# model training

In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from tensorflow import keras
from tensorflow.keras import layers

# Load the dataset
data = combine_df

# Separate features and target variable
X = data.drop(columns=['FraudulentTransactions'])
y = data['FraudulentTransactions']

X_encoded = X
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)


In [39]:
data['FraudulentTransactions'].sample(10)

1     0
17    0
7     0
4     0
5     1
3     0
0     0
16    0
15    1
11    0
Name: FraudulentTransactions, dtype: int32

In [40]:
combine_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 39 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   AmountTransferred                20 non-null     float64
 1   TransactionFee                   20 non-null     float64
 2   BlockHeight                      20 non-null     float64
 3   KYCStatus                        20 non-null     int32  
 4   TransactionFrequency             20 non-null     float64
 5   TransactionSizeDistribution      20 non-null     int32  
 6   GeographicInconsistencies        20 non-null     int32  
 7   TimeOfDayPatterns                20 non-null     int32  
 8   RegularVsIrregularBehavior       20 non-null     int32  
 9   ChangesInBehaviorOverTime        20 non-null     int32  
 10  CreditScore                      20 non-null     float64
 11  LoanRepaymentHistory             20 non-null     int32  
 12  CreditCardUtilization   

In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from tensorflow import keras
from tensorflow.keras import layers

In [42]:
# Load the dataset
data = combine_df

# Separate features and target variable
X = data.drop(columns=['FraudulentTransactions'])
y = data['FraudulentTransactions']

X_encoded = X
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [43]:
# Build the neural network model
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[X_test.shape[1]]),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=15, batch_size=20, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test Accuracy: 0.75


In [44]:
# Assuming you have a Keras model
model.save('model.h5')


  saving_api.save_model(


In [45]:
import pickle
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, RobustScaler



In [46]:


# MinMaxScaler for 'AmountTransferred', 'TransactionFee', 'BlockHeight'
minmax_scaler_blockchain = MinMaxScaler()
minmax_scaler_blockchain.fit(dataframes['BlockchainTransactions'][['AmountTransferred', 'TransactionFee', 'BlockHeight']])

# StandardScaler for 'TransactionFrequency'
standard_scaler_behavioral = StandardScaler()
standard_scaler_behavioral.fit(dataframes['BehavioralPatterns'][['TransactionFrequency']])

# RobustScaler for 'CreditScore', 'CreditCardUtilization', 'IncomeLevel', 'DebtToIncomeRatio'
robust_scaler_credit = RobustScaler()
robust_scaler_credit.fit(dataframes['CreditAndFinancialHistory'][['CreditScore', 'CreditCardUtilization', 'IncomeLevel', 'DebtToIncomeRatio']])

# LabelEncoders for various categorical columns
label_encoder_user_profile = LabelEncoder()
label_encoder_user_profile.fit(dataframes['UserProfile']['KYCStatus'])

label_encoder_device_ip = LabelEncoder()
label_encoder_device_ip.fit(dataframes['DeviceAndIPInformation']['ProxyOrVPNUsage'])

label_encoder_fraud_data = LabelEncoder()
label_encoder_fraud_data.fit(dataframes['HistoricalFraudData']['FraudulentTransactions'])

label_encoder_network_analysis = LabelEncoder()
label_encoder_network_analysis.fit(dataframes['NetworkAnalysis']['AnomaliesInNetwork'])

# Example: Add other scalers and encoders based on your preprocessing steps


# . Save the Preprocessing Objects

In [48]:
preprocessing = {
    'minmax_scaler_blockchain': minmax_scaler_blockchain,
    'standard_scaler_behavioral': standard_scaler_behavioral,
    'robust_scaler_credit': robust_scaler_credit,
    'label_encoder_user_profile': label_encoder_user_profile,
    'label_encoder_device_ip': label_encoder_device_ip,
    'label_encoder_fraud_data': label_encoder_fraud_data,
    'label_encoder_network_analysis': label_encoder_network_analysis,
    # Add other preprocessing objects here
}

# Save to preprocess.pkl
with open('preprocess_final_file.pkl', 'wb') as file:
    pickle.dump(preprocessing, file)


In [49]:
# Load preprocessing objects
with open('preprocess_final_file.pkl', 'rb') as file:
    preprocessing = pickle.load(file)

minmax_scaler_blockchain = preprocessing['minmax_scaler_blockchain']
standard_scaler_behavioral = preprocessing['standard_scaler_behavioral']
robust_scaler_credit = preprocessing['robust_scaler_credit']
label_encoder_user_profile = preprocessing['label_encoder_user_profile']
label_encoder_device_ip = preprocessing['label_encoder_device_ip']
label_encoder_fraud_data = preprocessing['label_encoder_fraud_data']
label_encoder_network_analysis = preprocessing['label_encoder_network_analysis']

# Use these objects in your FastAPI application as needed for preprocessing
