In [13]:
import pandas as pd
import numpy as np
from faker import Faker
from collections.abc import Sequence

# Initialize Faker for fake data generation
fake = Faker()

# Generate synthetic transaction data
def generate_transaction_data(num_records=10000):
    data = {
        'transaction_id': range(1, num_records + 1),
        'user_id': np.random.randint(1000, 1100, num_records),  # 100 unique users
        'timestamp': pd.date_range(start='2023-01-01', periods=num_records, freq='T'),
        'amount': np.random.exponential(scale=1000, size=num_records),  # Exponential distribution for amounts
        'currency': np.random.choice(['USD', 'EUR', 'BTC', 'ETH'], num_records),
        'transaction_type': np.random.choice(['purchase', 'transfer', 'withdrawal'], num_records),
        'location': [fake.country() for _ in range(num_records)],
        'is_fraud': np.random.choice([0, 1], num_records, p=[0.98, 0.02])  # 2% fraud rate
    }
    return pd.DataFrame(data)

# Generate data
transaction_df = generate_transaction_data()
print(transaction_df.head())

   transaction_id  user_id           timestamp       amount currency  \
0               1     1003 2023-01-01 00:00:00  1875.331000      ETH   
1               2     1091 2023-01-01 00:01:00   175.585357      ETH   
2               3     1021 2023-01-01 00:02:00   874.848685      ETH   
3               4     1039 2023-01-01 00:03:00   923.666164      ETH   
4               5     1070 2023-01-01 00:04:00   584.692428      USD   

  transaction_type                           location  is_fraud  
0         purchase                         Mauritania         0  
1         purchase                          Nicaragua         0  
2         transfer                               Chad         0  
3         purchase  Heard Island and McDonald Islands         0  
4         purchase                            Denmark         0  


In [14]:
# Add features
transaction_df['hour_of_day'] = transaction_df['timestamp'].dt.hour
transaction_df['day_of_week'] = transaction_df['timestamp'].dt.dayofweek

# Calculate user-level features
user_stats = transaction_df.groupby('user_id').agg({
    'amount': ['mean', 'std'],
    'transaction_id': 'count'
}).reset_index()
user_stats.columns = ['user_id', 'mean_amount', 'std_amount', 'transaction_count']

# Merge user stats with transaction data
transaction_df = transaction_df.merge(user_stats, on='user_id', how='left')

# Calculate deviation from mean
transaction_df['amount_deviation'] = (transaction_df['amount'] - transaction_df['mean_amount']) / transaction_df['std_amount']
transaction_df['amount_deviation'].fillna(0, inplace=True)  # Handle NaN for users with only one transaction

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Features and target
features = ['amount', 'hour_of_day', 'day_of_week', 'amount_deviation', 'transaction_count']
X = transaction_df[features]
y = transaction_df['is_fraud']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[1966    0]
 [  34    0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      1966
           1       0.00      0.00      0.00        34

    accuracy                           0.98      2000
   macro avg       0.49      0.50      0.50      2000
weighted avg       0.97      0.98      0.97      2000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
from sklearn.ensemble import RandomForestClassifier
import pickle

# Example: Train a Random Forest model
# Replace X_train and y_train with your actual training data
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)  # Train the model

# Save the trained model to a .pkl file
with open('fraud_detection_model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model saved as 'fraud_detection_model.pkl'")

Model saved as 'fraud_detection_model.pkl'


In [20]:
import pickle

# Load the trained model
with open('fraud_detection_model.pkl', 'rb') as f:
    model = pickle.load(f)

print("Model loaded successfully!")

Model loaded successfully!


In [22]:
pip install --upgrade jupyter jupyter_client jupyter_core


Collecting jupyter
  Obtaining dependency information for jupyter from https://files.pythonhosted.org/packages/38/64/285f20a31679bf547b75602702f7800e74dbabae36ef324f716c02804753/jupyter-1.1.1-py2.py3-none-any.whl.metadata
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting jupyter_client
  Obtaining dependency information for jupyter_client from https://files.pythonhosted.org/packages/11/85/b0394e0b6fcccd2c1eeefc230978a6f8cb0c5df1e4cd3e7625735a0d7d1e/jupyter_client-8.6.3-py3-none-any.whl.metadata
  Using cached jupyter_client-8.6.3-py3-none-any.whl.metadata (8.3 kB)
Collecting jupyter_core
  Obtaining dependency information for jupyter_core from https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl.metadata
  Using cached jupyter_core-5.7.2-py3-none-any.whl.metadata (3.4 kB)


Collecting fqdn (from jsonschema>=4.17.3->jupyterlab-server~=2.19->jupyterlab->jupyter)
  Obtaining dependency information for fqdn from https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl.metadata
  Downloading fqdn-1.5.1-py3-none-any.whl.metadata (1.4 kB)
Collecting isoduration (from jsonschema>=4.17.3->jupyterlab-server~=2.19->jupyterlab->jupyter)
  Obtaining dependency information for isoduration from https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl.metadata
  Downloading isoduration-20.11.0-py3-none-any.whl.metadata (5.7 kB)
Collecting uri-template (from jsonschema>=4.17.3->jupyterlab-server~=2.19->jupyterlab->jupyter)
  Obtaining dependency information for uri-template from https://files.pythonhosted.org/packages/e7/00/3fca040d7cf8a32776d3d81a00c8ee7457e00f80c649f1e4a863c8321ae9/uri_template-1.3.0-py3-non

      Successfully uninstalled jupyter_core-5.3.0
  Attempting uninstall: jupyter
    Found existing installation: jupyter 1.0.0
    Uninstalling jupyter-1.0.0:
      Successfully uninstalled jupyter-1.0.0
Successfully installed fqdn-1.5.1 isoduration-20.11.0 jupyter-1.1.1 jupyter_core-5.7.2 uri-template-1.3.0 webcolors-24.11.1
Note: you may need to restart the kernel to use updated packages.


In [23]:
from flask import Flask, request, jsonify
import pickle

# Load the trained model
with open('fraud_detection_model.pkl', 'rb') as f:
    model = pickle.load(f)

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    features = [data['amount'], data['hour_of_day'], data['day_of_week'], data['amount_deviation'], data['transaction_count']]
    prediction = model.predict([features])
    return jsonify({'is_fraud': int(prediction[0])})

if __name__ == '__main__':
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (fsevents)
Traceback (most recent call last):
  File "/Users/taylormcwilliam/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py", line 15, in <module>
    from ipykernel import kernelapp as app
  File "/Users/taylormcwilliam/anaconda3/lib/python3.11/site-packages/ipykernel/__init__.py", line 5, in <module>
    from .connect import *  # noqa
    ^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/taylormcwilliam/anaconda3/lib/python3.11/site-packages/ipykernel/connect.py", line 11, in <module>
    import jupyter_client
  File "/Users/taylormcwilliam/anaconda3/lib/python3.11/site-packages/jupyter_client/__init__.py", line 8, in <module>
    from .asynchronous import AsyncKernelClient  # noqa
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/taylormcwilliam/anaconda3/lib/python3.11/site-packages/jupyter_client/asynchronous/__init__.py", line 1, in <module>
    from .client import AsyncKernelClien

SystemExit: 1