In [1]:
from azureml.core import Workspace, Dataset
ws = Workspace.from_config()
dataset = Dataset.get_by_name(ws, name='frauds_data')
df = dataset.to_pandas_dataframe()
print(df.head())

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}
   transaction_id  user_id    amount transaction_type     status device_id  \
0               1      165   1607.95       withdrawal  completed    dev992   
1               2      723   3811.21         transfer    pending    dev965   
2               3      598    743.71       withdrawal    pending    dev515   
3               4      497   1903.64       withdrawal  completed    dev489   
4               5      205  11227.20       withdrawal    pending  devF1997   

           location  is_foreign_transaction  num_chargebacks predicted_fraud  \
0    Chennai, India                   False                0            None   
1     Mumbai, India                   False                2            None   
2  Bangalore, India                    True                2            None   
3  Bangalore, India                    True 

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix


In [3]:
# 3. Preprocessing
df = df.copy()

# Fill nulls if any
df = df.fillna({'predicted_fraud': 'None'})


In [4]:
# Encode categorical features
cat_features = ['transaction_type', 'status', 'device_id', 'location']
le_dict = {}

for col in cat_features:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    le_dict[col] = le  # For inverse_transform later if needed

# Convert booleans to integers
df['is_foreign_transaction'] = df['is_foreign_transaction'].astype(int)

# 4. Prepare features and target
features = ['user_id', 'amount', 'transaction_type', 'status', 'device_id', 'location',
            'is_foreign_transaction', 'num_chargebacks']
target = 'potential_fraud'

In [5]:
X = df[features]
y = df[target]

# 5. Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# 6. Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# 7. Evaluate the model
y_pred = clf.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00         6

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10

Confusion Matrix:
 [[4 0]
 [0 6]]


In [7]:
# 8. Predict for entire data and store in predicted_fraud
df['predicted_fraud'] = clf.predict(X)

# Optional: convert back to 'Yes'/'No' format or leave as 0/1
df['predicted_fraud'] = df['predicted_fraud'].apply(lambda x: 'Yes' if x == 1 else 'No')

# Show final dataframe
df.head()


Unnamed: 0,transaction_id,user_id,amount,transaction_type,status,device_id,location,is_foreign_transaction,num_chargebacks,predicted_fraud,potential_fraud
0,1,165,1607.95,3,0,27,2,0,0,No,0
1,2,723,3811.21,2,2,25,5,0,2,No,0
2,3,598,743.71,3,2,14,0,1,2,Yes,1
3,4,497,1903.64,3,0,13,0,1,2,Yes,1
4,5,205,11227.2,3,2,29,1,1,3,Yes,1


In [8]:
import joblib

# Save the model
joblib.dump(clf, 'fraud_detection_rf_model.pkl')

print("Model saved as 'fraud_detection_rf_model.pkl'")


✅ Model saved as 'fraud_detection_rf_model.pkl'


In [9]:
from azureml.core import Workspace, Model

# If not already connected to your workspace, load it
# ws = Workspace.from_config()  # Uncomment if needed

# Register the Random Forest model
model = Model.register(
    workspace=ws,
    model_path='fraud_detection_rf_model.pkl',  # Local path to the saved model
    model_name='fraud_detection_rf_model'       # Name to register in Azure
)

print(f"Model registered: {model.name}, ID: {model.id}, Version: {model.version}")


Registering model fraud_detection_rf_model
✅ Model registered: fraud_detection_rf_model, ID: fraud_detection_rf_model:1, Version: 1
