In [2]:
import pandas as pd
import evidently
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, RegressionPreset
from fraud_detection.data import load_data
from fraud_detection.models import train_model
from fraud_detection.utils import setup_logger
import joblib

In [3]:
# Setting up the logger
logger = setup_logger()

In [5]:
# Load the data
db_path = 'interview_database.db'
query_file = 'data.sql'
logger.info("Loading data...")
data = load_data(db_path, query_file, winsorize_data=True)


2024-10-15 23:18:55,109 - fraud_detection - INFO - Loading data...
2024-10-15 23:18:55,109 - fraud_detection - INFO - Loading data...
2024-10-15 23:18:55,116 - fraud_detection - ERROR - Error loading data: IO Error: Cannot open database "/Users/robertamanfu/Documents/misc/fraud_detection/interview_database.db" in read-only mode: database does not exist
2024-10-15 23:18:55,116 - fraud_detection - ERROR - Error loading data: IO Error: Cannot open database "/Users/robertamanfu/Documents/misc/fraud_detection/interview_database.db" in read-only mode: database does not exist


IOException: IO Error: Cannot open database "/Users/robertamanfu/Documents/misc/fraud_detection/interview_database.db" in read-only mode: database does not exist

In [None]:
# Splitting the data into training and testing sets
from sklearn.model_selection import train_test_split
X = data.drop(columns=['is_fraud'])
y = data['is_fraud']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
logger.info("Training the model...")
model = train_model('logistic')
model.fit(X_train, y_train)

# Save the model
joblib.dump(model, 'fraud_detection_model.pkl')
logger.info("Model saved as fraud_detection_model.pkl")

# Generate predictions
y_pred = model.predict(X_test)


In [None]:
# Create an Evidently profile
profile = Profile(sections=[DataDriftProfileSection(), RegressionPerformanceProfileSection()])
profile.calculate(reference_data=X_train, current_data=X_test, reference_target=y_train, current_target=y_test)
profile.to_file("model_profile.json")

# Create an Evidently dashboard
dashboard = Dashboard(tabs=[DataDriftTab(), RegressionPerformanceTab()])
dashboard.calculate(reference_data=X_train, current_data=X_test, reference_target=y_train, current_target=y_test)
dashboard.save("model_dashboard.html")

# Create a model card using Evidently
from evidently.report import Report
from evidently.report.sections import RegressionPerformanceMetricsSection, ModelOverviewSection

model_card = Report(sections=[ModelOverviewSection(), RegressionPerformanceMetricsSection()])
model_card.calculate(reference_data=X_train, current_data=X_test, reference_target=y_train, current_target=y_test)
model_card.save("model_card.html")

# Output information
logger.info("Model profile saved as model_profile.json")
logger.info("Model dashboard saved as model_dashboard.html")
logger.info("Model card saved as model_card.html")