In [9]:
import pandas as pd
import numpy as np

# Set the number of fraud samples and total samples
Total_samples = 20000
Fraud_rate = 0.02
Fraud_samples = int(Fraud_rate * Total_samples)

# Generate fraud samples
fraud_data = pd.DataFrame({'amount': np.random.randint(10, 1001, size=Fraud_samples),
                           'account_number': np.random.randint(10000, 20000, size=Fraud_samples),
                           'address_change': np.random.randint(0, 20, size=Fraud_samples),
                           'email_id_change': np.random.randint(0, 40, size=Fraud_samples),
                           'phone_change': np.random.randint(0, 30, size=Fraud_samples),
                           'pin_change': np.random.randint(0, 3, size=Fraud_samples),
                           'fraud': np.ones(Fraud_samples)})

# Generate non-fraud samples
non_fraud_samples = Total_samples - Fraud_samples
non_fraud_data = pd.DataFrame({'amount': np.random.randint(10, 1001, size=non_fraud_samples),
                               'account_number': np.random.randint(1000, 10000, size=non_fraud_samples),
                               'address_change': np.random.randint(0, 2, size=non_fraud_samples),
                               'email_id_change': np.random.randint(0, 2, size=non_fraud_samples),
                               'phone_change': np.random.randint(0, 2, size=non_fraud_samples),
                               'pin_change': np.random.randint(0, 2, size=non_fraud_samples),
                               'fraud': np.zeros(non_fraud_samples)})

# Concatenate fraud and non-fraud samples
dataset = pd.concat([fraud_data, non_fraud_data], ignore_index=True)

# Shuffle the dataset
df = dataset.sample(frac=1).reset_index(drop=True)

# Preview the dataset
print(df.head())


   amount  account_number  address_change  email_id_change  phone_change  \
0     131            7726               0                1             1   
1     179           11795              14               19            23   
2      82            3340               0                1             0   
3     891            4970               0                0             1   
4     556            4943               0                1             1   

   pin_change  fraud  
0           0    0.0  
1           1    1.0  
2           0    0.0  
3           0    0.0  
4           1    0.0  


In [4]:
dataset['fraud'].value_counts()

0.0    19600
1.0      400
Name: fraud, dtype: int64

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

# Load the dataset
dataset = df  # Replace 'fraud_dataset.csv' with your dataset file name

# Separate the features (X) and target variable (y)
X = dataset.drop('fraud', axis=1)
y = dataset['fraud']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the numerical features
scaler = MinMaxScaler()
X_train[['amount', 'account_number']] = scaler.fit_transform(X_train[['amount', 'account_number']])
X_test[['amount', 'account_number']] = scaler.transform(X_test[['amount', 'account_number']])

# Reshape the data for LSTM
X_train = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Define the model
model = Sequential()
model.add(LSTM(64, input_shape=(1, X_train.shape[2])))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model on the testing set
loss, accuracy = model.evaluate(X_test, y_test)
print("Loss:", loss)
print("Accuracy:", accuracy)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.00012831912317778915
Accuracy: 1.0


In [13]:
X_train

array([[[5.16161616e-01, 1.24432957e-01, 0.00000000e+00, 0.00000000e+00,
         1.00000000e+00, 1.00000000e+00]],

       [[4.38383838e-01, 7.86528115e-01, 1.30000000e+01, 2.00000000e+00,
         3.00000000e+00, 1.00000000e+00]],

       [[2.72727273e-02, 7.64848613e-03, 0.00000000e+00, 1.00000000e+00,
         0.00000000e+00, 0.00000000e+00]],

       ...,

       [[2.17171717e-01, 2.72813588e-01, 1.00000000e+00, 1.00000000e+00,
         0.00000000e+00, 1.00000000e+00]],

       [[6.70707071e-01, 2.88268805e-01, 1.00000000e+00, 1.00000000e+00,
         0.00000000e+00, 0.00000000e+00]],

       [[3.31313131e-01, 4.43612195e-02, 0.00000000e+00, 1.00000000e+00,
         1.00000000e+00, 1.00000000e+00]]])

# Certainly! If you want to discover sequential patterns in the data to predict fraud, you can use a different approach called sequential pattern mining. This technique aims to extract frequent sequential patterns from the dataset.

Here's a high-level overview of how you can apply sequential pattern mining for fraud detection:

Preprocess the data: Convert categorical variables into numerical representations and normalize numerical features if necessary.

Define the sequence representation: Map each record into a sequence of events. In your case, you can consider the "fraud" variable as the event of interest.

Discover frequent sequential patterns: Use a sequential pattern mining algorithm, such as the AprioriAll algorithm or the PrefixSpan algorithm, to extract frequent sequential patterns from the dataset.

Feature extraction: Extract features from the frequent sequential patterns to represent each record. For example, you can count the occurrences of specific patterns within a record or calculate statistics such as the average length of fraudulent sequences.

Split the dataset: Divide the dataset into training and testing sets.

Build and train a classification model: Use the extracted features as input to a classification model, such as a decision tree, random forest, or neural network, to predict fraud/non-fraud.

Evaluate the model: Assess the performance of the model on the testing set using appropriate evaluation metrics, such as accuracy, precision, recall, and F1 score.

It's worth noting that sequential pattern mining can be computationally intensive, especially for large datasets. You may need to adjust the parameters and algorithms based on your dataset size and specific requirements.

Implementing sequential pattern mining for fraud detection requires more complex code and algorithms. If you'd like a more detailed example or assistance with implementing sequential pattern mining in Python, please let me know!

# If you don't have access to the prefixspan library for sequential pattern mining, an alternative approach is to use the FP-growth algorithm, which is another popular algorithm for mining frequent itemsets and sequential patterns. The mlxtend library in Python provides an implementation of the FP-growth algorithm that you can use.

Here's an example of how to use the FP-growth algorithm from the mlxtend library to discover frequent sequential patterns:

# In the code above, we first convert the dataset into a list of transactions, where each transaction represents a sequence of events. Then, we one-hot encode the transactions using the TransactionEncoder from the mlxtend.preprocessing module. Finally, we apply the FP-growth algorithm on the one-hot encoded data using the fpgrowth function from the mlxtend.frequent_patterns module.

Please note that you may need to install the mlxtend library by running pip install mlxtend in your Python environment.

After discovering the frequent sequential patterns, you can proceed with the remaining steps, such as feature extraction, splitting the dataset, building and training a classification model, and evaluating the model as mentioned in the previous response.

Let me know if you have any further questions!

In [19]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth

# Load the dataset
dataset = df  # Replace 'fraud_dataset.csv' with your dataset file name

# Separate the features (X) and target variable (y)
X = dataset.drop('fraud', axis=1)
y = dataset['fraud']

# Normalize the numerical features
scaler = MinMaxScaler()
X[['amount', 'account_number']] = scaler.fit_transform(X[['amount', 'account_number']])

# Convert the dataset into a list of transactions
transactions = X.apply(lambda row: [column for column, value in row.items() if value == 1], axis=1).tolist()

# One-hot encode the transactions
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Discover frequent sequential patterns using FP-growth
min_support = 0.1  # Adjust the minimum support threshold as per your dataset
frequent_patterns = fpgrowth(df_encoded, min_support=min_support, use_colnames=True)
frequent_patterns

Unnamed: 0,support,itemsets
0,0.49625,(email_id_change)
1,0.4856,(phone_change)
2,0.49685,(pin_change)
3,0.49125,(address_change)
4,0.248,"(pin_change, email_id_change)"
5,0.24755,"(email_id_change, phone_change)"
6,0.24695,"(pin_change, phone_change)"
7,0.2386,"(address_change, phone_change)"
8,0.1262,"(pin_change, email_id_change, phone_change)"
9,0.12335,"(address_change, email_id_change, phone_change)"


In [30]:
import pandas as pd
import numpy as np

# Set the number of fraud samples and total samples
Total_samples = 200000
Fraud_rate = 0.02
Fraud_samples = int(Fraud_rate * Total_samples)

# Generate fraud samples
fraud_data = pd.DataFrame({'amount': np.random.randint(10, 1001, size=Fraud_samples),
                           'account_number': np.random.randint(10000, 20000, size=Fraud_samples),
                           'address_change': np.random.randint(0, 1, size=Fraud_samples),
                           'email_id_change': np.random.randint(0, 1, size=Fraud_samples),
                           'phone_change': np.random.randint(0, 1, size=Fraud_samples),
                           'pin_change': np.random.randint(0, 1, size=Fraud_samples),
                           'fraud': np.ones(Fraud_samples)})

# Generate non-fraud samples
non_fraud_samples = Total_samples - Fraud_samples
non_fraud_data = pd.DataFrame({'amount': np.random.randint(10, 1001, size=non_fraud_samples),
                               'account_number': np.random.randint(1000, 10000, size=non_fraud_samples),
                               'address_change': np.random.randint(0, 1, size=non_fraud_samples),
                               'email_id_change': np.random.randint(0, 1, size=non_fraud_samples),
                               'phone_change': np.random.randint(0, 1, size=non_fraud_samples),
                               'pin_change': np.random.randint(0, 1, size=non_fraud_samples),
                               'fraud': np.zeros(non_fraud_samples)})

# Concatenate fraud and non-fraud samples
dataset = pd.concat([fraud_data, non_fraud_data], ignore_index=True)

# Shuffle the dataset
df = dataset.sample(frac=1).reset_index(drop=True)

# Preview the dataset
print(df.head(2))


   amount  account_number  address_change  email_id_change  phone_change  \
0     574            9924               0                0             0   
1     691            9335               0                0             0   

   pin_change  fraud  
0           0    0.0  
1           0    0.0  


In [46]:
import pandas as pd
import numpy as np

# Set the random seed for reproducibility
np.random.seed(42)

# Number of records in the dataset
num_records = 2000

# Calculate the number of records for each variable
num_address_change = int(num_records * 0.20)
num_email_id_change = int(num_records * 0.10)
num_phone_change = int(num_records * 0.05)
num_pin_change = int(num_records * 0.07)

# Calculate the number of fraud cases
num_fraud = int(num_records * 0.0002)

# Generate the dataset
data = {
    'amount': np.random.randint(10, 1001, num_records),
    'account_number': np.random.randint(1000, 10000, num_records),
    'address_change': np.concatenate((np.zeros(num_records - num_address_change), np.ones(num_address_change))),
    'email_id_change': np.concatenate((np.zeros(num_records - num_email_id_change), np.ones(num_email_id_change))),
    'phone_change': np.concatenate((np.zeros(num_records - num_phone_change), np.ones(num_phone_change))),
    'pin_change': np.concatenate((np.zeros(num_records - num_pin_change), np.ones(num_pin_change))),
    'fraud': np.concatenate((np.zeros(num_records - num_fraud), np.ones(num_fraud)))
}

# Shuffle the dataset
df = pd.DataFrame(data).sample(frac=1, random_state=42).reset_index(drop=True)

# Display the dataset
print(dataset.head())


   amount  account_number  address_change  email_id_change  phone_change  \
0     305            8186             1.0              1.0           0.0   
1     160            3113             0.0              0.0           0.0   
2     654            4629             0.0              0.0           0.0   
3     694            2778             0.0              0.0           0.0   
4     512            3261             0.0              0.0           0.0   

   pin_change  fraud  
0         1.0    0.0  
1         0.0    0.0  
2         0.0    0.0  
3         0.0    0.0  
4         0.0    0.0  


In [47]:
df['email_id_change'].value_counts()

0.0    1800
1.0     200
Name: email_id_change, dtype: int64

In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Load the dataset
dataset = df  # Replace 'fraud_dataset.csv' with your dataset file name

# Separate the features (X) and target variable (y)
X = dataset.drop('fraud', axis=1)
y = dataset['fraud']

# Normalize the numerical features
scaler = MinMaxScaler()
X[['amount']] = scaler.fit_transform(X[['amount']])

# Convert the dataset into a list of transactions
transactions = X.apply(lambda row: [column for column, value in row.items() if value == 1], axis=1).tolist()

# One-hot encode the transactions
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Discover frequent sequential patterns using FP-growth
min_support = 0.1  # Adjust the minimum support threshold as per your dataset
frequent_patterns = fpgrowth(df_encoded, min_support=min_support, use_colnames=True)

# Feature extraction
def extract_features(row):
    features = []
    for pattern in frequent_patterns['itemsets']:
        count = 1 if pattern.issubset(row) else 0
        features.append(count)
    return features

# Apply feature extraction to the dataset
X_features = np.array(X.apply(extract_features, axis=1).tolist())

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=42)

# Build and train the classification model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Accuracy: 1.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [40]:
frequent_patterns

Unnamed: 0,support,itemsets
0,0.2,(address_change)
1,0.1,(email_id_change)
2,0.1,"(address_change, email_id_change)"


In [44]:
df.head()

Unnamed: 0,amount,account_number,address_change,email_id_change,phone_change,pin_change,fraud
0,305,8186,1.0,1.0,0.0,1.0,0.0
1,160,3113,0.0,0.0,0.0,0.0,0.0
2,654,4629,0.0,0.0,0.0,0.0,0.0
3,694,2778,0.0,0.0,0.0,0.0,0.0
4,512,3261,0.0,0.0,0.0,0.0,0.0


In [48]:
df['fraud'].value_counts()

0.0    2000
Name: fraud, dtype: int64

In [57]:
import pandas as pd
import numpy as np

# Set the random seed for reproducibility
np.random.seed(42)

# Number of records in the dataset
num_records = 200000

# Calculate the number of records for each variable
num_address_change = int(num_records * 0.40)
num_email_id_change = int(num_records * 0.30)
num_phone_change = int(num_records * 0.2)
num_pin_change = int(num_records * 0.15)

# Calculate the number of fraud cases
num_fraud = int(num_records * 0.03)

# Generate the dataset
data = {
    'amount': np.random.randint(10, 1001, num_records),
    'account_number': np.random.randint(1000, 10000, num_records),
    'address_change': np.concatenate((np.zeros(num_records - num_address_change), np.ones(num_address_change))),
    'email_id_change': np.concatenate((np.zeros(num_records - num_email_id_change), np.ones(num_email_id_change))),
    'phone_change': np.concatenate((np.zeros(num_records - num_phone_change), np.ones(num_phone_change))),
    'pin_change': np.concatenate((np.zeros(num_records - num_pin_change), np.ones(num_pin_change))),
    'fraud': np.concatenate((np.zeros(num_records - num_fraud), np.ones(num_fraud)))
}

# Shuffle the dataset
df = pd.DataFrame(data).sample(frac=1, random_state=42).reset_index(drop=True)

# Display the dataset
print(dataset.head())


   amount  account_number  address_change  email_id_change  phone_change  \
0     294            2567             0.0              0.0           0.0   
1     730            3166             0.0              0.0           0.0   
2     903            8976             0.0              0.0           0.0   
3     742            1862             0.0              0.0           0.0   
4     754            6767             0.0              0.0           0.0   

   pin_change  fraud  
0         0.0    0.0  
1         0.0    0.0  
2         0.0    0.0  
3         0.0    0.0  
4         0.0    0.0  


In [58]:
df['fraud'].value_counts()

0.0    194000
1.0      6000
Name: fraud, dtype: int64

In [59]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Load the dataset
dataset = df # Replace 'fraud_dataset.csv' with your dataset file name

# Separate the features (X) and target variable (y)
X = dataset.drop('fraud', axis=1)
y = dataset['fraud']

# Normalize the numerical features
scaler = MinMaxScaler()
X[['amount', 'account_number']] = scaler.fit_transform(X[['amount', 'account_number']])

# Convert the dataset into a list of transactions
transactions = X.apply(lambda row: [column for column, value in row.items() if value == 1], axis=1).tolist()

# One-hot encode the transactions
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Discover frequent sequential patterns using FP-growth
min_support = 0.1  # Adjust the minimum support threshold as per your dataset
frequent_patterns = fpgrowth(df_encoded, min_support=min_support, use_colnames=True)

# Feature extraction
def extract_features(row):
    features = []
    for pattern in frequent_patterns['itemsets']:
        count = 1 if pattern.issubset(row) else 0
        features.append(count)
    return features

# Apply feature extraction to the dataset
X_features = np.array(X.apply(extract_features, axis=1).tolist())

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=42)

# Reshape the input variables for LSTM
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Build the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], 1)))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the LSTM model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Make predictions on the testing set
y_pred_prob = model.predict(X_test)
y_pred = np.round(y_pred_prob).flatten()

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


  return self.randrange(a, b+1)
  return self.randrange(a, b+1)


Epoch 1/10


  return self.randrange(a, b+1)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.97065
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))


In [60]:
frequent_patterns

Unnamed: 0,support,itemsets
0,0.4,(address_change)
1,0.3,(email_id_change)
2,0.2,(phone_change)
3,0.15,(pin_change)
4,0.3,"(address_change, email_id_change)"
5,0.2,"(address_change, phone_change)"
6,0.15,"(address_change, pin_change)"
7,0.2,"(email_id_change, phone_change)"
8,0.15,"(pin_change, email_id_change)"
9,0.15,"(pin_change, phone_change)"


In [55]:
X_features.shape

(200000, 3)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [61]:
y_test

119737    0.0
72272     0.0
158154    0.0
65426     0.0
30074     0.0
         ... 
4174      0.0
91537     0.0
156449    0.0
184376    0.0
6584      0.0
Name: fraud, Length: 40000, dtype: float64

In [64]:
y_test.value_counts()

0.0    38826
1.0     1174
Name: fraud, dtype: int64

In [66]:
y_pred

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

In [67]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
dataset = df  # Replace 'fraud_dataset.csv' with your dataset file name

# Separate the features (X) and target variable (y)
X = dataset.drop('fraud', axis=1)
y = dataset['fraud']

# Normalize the numerical features
scaler = MinMaxScaler()
X[['amount', 'account_number']] = scaler.fit_transform(X[['amount', 'account_number']])

# Define the sliding window size
window_size = 5  # Adjust the window size as per your requirement

# Extract sequential pattern features using the sliding window approach
def extract_features(data):
    features = []
    for i in range(len(data) - window_size + 1):
        window = data[i:i + window_size]
        pattern = tuple(window)
        features.append(pattern)
    return features

# Apply feature extraction to the dataset
X['sequential_pattern'] = X.apply(extract_features, axis=1)
X_features = X['sequential_pattern'].tolist()

# Convert sequential patterns into binary features
unique_patterns = set(pattern for patterns in X_features for pattern in patterns)

def pattern_to_binary(row):
    features = []
    for pattern in unique_patterns:
        features.append(1 if pattern in row else 0)
    return features

X_features = np.array([pattern_to_binary(row) for row in X_features])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=42)

# Reshape the input variables for LSTM
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Build the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], 1)))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the LSTM model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Make predictions on the testing set
y_pred_prob = model.predict(X_test)
y_pred = np.round(y_pred_prob).flatten()

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)



KeyboardInterrupt



In [69]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
dataset = df  # Replace 'fraud_dataset.csv' with your dataset file name

# Separate the features (X) and target variable (y)
X = dataset.drop('fraud', axis=1)
y = dataset['fraud']

# Normalize the numerical features
scaler = MinMaxScaler()
X[['amount', 'account_number']] = scaler.fit_transform(X[['amount', 'account_number']])

# Define the sliding window size
window_size = 5  # Adjust the window size as per your requirement

# Extract sequential pattern features using the sliding window approach
def extract_features(data):
    features = []
    for i in range(len(data) - window_size + 1):
        window = data[i:i + window_size]
        pattern = tuple(window)
        features.append(pattern)
    return features

# Apply feature extraction to the dataset
X['sequential_pattern'] = X.apply(extract_features, axis=1)
X_features = X['sequential_pattern'].tolist()

# Convert sequential patterns into binary features
unique_patterns = set(pattern for patterns in X_features for pattern in patterns)

def pattern_to_binary(row):
    features = []
    for pattern in unique_patterns:
        features.append(1 if pattern in row else 0)
    return features

X_features = np.array([pattern_to_binary(row) for row in X_features])
