# Challenge AI: Detección de Fraudes en Transacciones Bancarias

In [23]:
import pandas as pd
import boto3
import json
import time

df = pd.read_excel('credir_risk_reto.csv.xlsx')
bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')
df.head()

Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose
0,67,male,2,own,,little,1169,6,radio/TV
1,22,female,2,own,little,moderate,5951,48,radio/TV
2,49,male,1,own,little,,2096,12,education
3,45,male,2,free,little,little,7882,42,furniture/equipment
4,53,male,2,free,little,little,4870,24,car


Generación de descripciones con Bedrock

In [None]:
descriptions = []
for i in range(len(df)):
    fila = df.iloc[i]  
    
    ahorros = fila['Saving accounts'] if fila['Saving accounts'] else 'desconocido'
    cuenta_corriente = fila['Checking account'] if fila['Checking account'] else 'desconocido'
    mapa_trabajo = {0: 'no cualificado no residente', 1: 'no cualificado residente', 2: 'cualificado', 3: 'altamente cualificado'}
    desc_trabajo = mapa_trabajo.get(fila['Job'], 'desconocido')

    #Crear prompt
    prompt = f"""
    Genera UNA descripción concisa del perfil de riesgo crediticio para una persona con estas características en UNA linea, DETENIÉNDOTE después de aproximadamente 50 palabras:
    - Edad: {fila['Age']}
    - Sexo: {fila['Sex']}
    - Trabajo: {desc_trabajo}
    - Vivienda: {fila['Housing']}
    - Cuentas de ahorro: {ahorros}
    - Cuenta corriente: {cuenta_corriente}
    - Monto de crédito: {fila['Credit amount']} EUR
    - Duración: {fila['Duration']} meses
    - Propósito: {fila['Purpose']}

    Evalúa el riesgo crediticio en solo un párrafo corto, sin repeticiones ni preguntas, ni contenido adicional
    Al final indica si es “bad risk” o “good risk”,asegurando que la evaluación sea coherente con la clasificación, solo una de esas dos opciones, sin explicaciones adicionales.
    Si el riesgo crediticio es "moderado a alto" se tiene que clasificar como "bad risk" a menos que haya factores compensatorios fuertes (e.g., ahorros significativos).

    """

    body = json.dumps({
        "prompt": prompt,
        "temperature": 0.6,
        #"max_tokens": 100
    })
    try:
        response = bedrock.invoke_model(
            modelId='arn:aws:bedrock:us-east-1:004082821794:inference-profile/us.meta.llama3-2-90b-instruct-v1:0',
            contentType='application/json',
            accept='application/json',
            body=body
        )
        response_body = json.loads(response['body'].read())
        
        if 'generation' in response_body: 
            description = response_body['generation'].strip()
        else:
            description = "No se pudo extraer la descripción."
        #print(f"Respuesta completa para fila {i}: {description}")
        
        descriptions.append(description)
    except Exception as e:
        print(f"Error en fila {i}: {e}")
        descriptions.append("No se pudo generar la descripción.")
    time.sleep(1)

# Asignar las descripciones al DataFrame
df.loc[:, 'description'] = descriptions
df.to_excel('credit_risk_with_descriptions.xlsx', index=False)

Clasificación y etiquetado

In [104]:
df = pd.read_excel('credit_risk_with_descriptions.xlsx')

def extraer_target(descripcion):
    if 'good risk' in descripcion.lower():
        return 'good risk'
    if 'buen riesgo' in descripcion.lower():
        return 'good risk'
    if 'bad risk' in descripcion.lower():
        return 'bad risk'
    if 'mal riesgo' in descripcion.lower():
        return 'bad risk'

df['target'] = df['description'].apply(extraer_target)

df.to_excel('credit_risk_labeled.xlsx', index=False)

In [106]:
#Distribución de clases
target_counts = df['target'].value_counts().sort_index()  # Contar 'good risk' y 'bad risk'
target_counts

target
bad risk     657
good risk    343
Name: count, dtype: int64

Preprocesamiento

In [None]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np

df = pd.read_excel('credit_risk_labeled.xlsx')

df['target'] = df['target'].map({'good risk': 0, 'bad risk': 1})

if df['Sex'].dtype == 'object':
    df['Sex'] = df['Sex'].map({'female': 0, 'male': 1})

df['Housing'] = df['Housing'].map({'free': 0, 'rent': 1, 'own': 2})

df['Saving accounts'] = df['Saving accounts'].map({
    'little': 0, 'moderate': 1, 'quite rich': 2, 'rich': 3
})

df['Checking account'] = df['Checking account'].map({
    'little': 0, 'moderate': 1, 'rich': 2
})

numeric_columns = ['Age', 'Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account', 'Credit amount', 'Duration']
categorical_columns = ['Purpose']

X_numeric = df[numeric_columns].values

encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_categorical = encoder.fit_transform(df[categorical_columns])

X = np.hstack((X_numeric, X_categorical))
y = df['target'].values

numeric_names = numeric_columns
categorical_names = [f"{col}_{cat}" for col, cats in zip(categorical_columns, encoder.categories_) for cat in cats]
all_feature_names = numeric_names + categorical_names

X_df = pd.DataFrame(X, columns=all_feature_names)
final_df = pd.concat([ pd.Series(y, name='target'),X_df], axis=1)

train_df = final_df.sample(frac=0.8, random_state=42)
test_df = final_df.drop(train_df.index)

#Debemos verificar que la distribución de clases sea similar a la del dataset total, lo que reflejara la distribucion real del problema de riesgo crediticio
print("Proporcion de clases en la data de entrenamiento:", train_df["target"].value_counts())
print("Proporcion de clases en la data de prueba:", test_df["target"].value_counts())

train_df.to_csv('credit_risk_processed_train.csv', index=False, header=False)
test_df.to_csv('credit_risk_processed_test.csv', index=False, header=False)

Proporcion de clases en la data de entrenamiento: target
1    528
0    272
Name: count, dtype: int64
Proporcion de clases en la data de prueba: target
1    129
0     71
Name: count, dtype: int64


Entrenamiento

In [None]:
import sagemaker
from sagemaker.inputs import TrainingInput

role = "arn:aws:iam::004082821794:role/SageMakerExecutionRole"  
region = sagemaker.Session().boto_region_name
bucket = 'fraud-detection-challenge-2025'  #Bucket S3
output_path = f's3://{bucket}/output'  

#Definir la ubicación del CSV en S3
train_data_path = f's3://{bucket}/credit_risk_processed_train.csv'

#Configurar el modelo XGBoost
xgboost_container = sagemaker.image_uris.retrieve("xgboost", region, version="1.5-1")
xgb = sagemaker.estimator.Estimator(
    image_uri=xgboost_container,
    role=role,
    instance_count=1,
    instance_type='ml.m4.xlarge',
    volume_size=5,  # Como elegiste 5 GB
    output_path=output_path,
    sagemaker_session=sagemaker.Session(),
    max_run=3600  # Máximo 1 hora
)

#Configurar hiperparámetros
xgb.set_hyperparameters(
    objective='binary:logistic',
    num_round=100,
    max_depth=6,
    eta=0.2,
    subsample=0.8
)

#Definir los datos de entrada
train_input = TrainingInput(train_data_path, content_type='csv')

#Lanzar el training job
xgb.fit({'train': train_input})

print(f"Training job {xgb.latest_training_job.name} iniciado. Revisa el estado en la consola de SageMaker.")

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-07-24-02-32-10-685


2025-07-24 02:32:12 Starting - Starting the training job...
2025-07-24 02:32:48 Downloading - Downloading input data...
2025-07-24 02:33:18 Downloading - Downloading the training image......
  from pandas import MultiIndex, Int64Index
[2025-07-24 02:34:32.099 ip-10-2-172-41.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2025-07-24 02:34:32.122 ip-10-2-172-41.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.
[2025-07-24:02:34:32:INFO] Imported framework sagemaker_xgboost_container.training
[2025-07-24:02:34:32:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.
Returning the value itself
[2025-07-24:02:34:32:INFO] No GPUs detected (normal if no gpus installed)
[2025-07-24:02:34:32:INFO] Running XGBoost Sagemaker in algorithm mode
[2025-07-24:02:34:32:INFO] Determined 0 GPU(s) available on the instance.
[2025-07-24:02:34:32:INFO] Determined delimiter of CSV input is ','
[2025-07-24:02:34:32:INFO] files path: /o

Testing

In [None]:
import xgboost as xgb
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

#Cargar el modelo desde S3
model = xgb.Booster()
model.load_model('model/xgboost-model')  # Ajusta la ruta

#Cargar datos de validación
data = pd.read_csv('credit_risk_processed_test.csv', header=None)
X = data.iloc[:, 1:].values  # Todas menos la primera columna
y = data.iloc[:, 0].values   # Primera columna como target
    
#Predecir
dmatrix = xgb.DMatrix(X)
predictions = model.predict(dmatrix)
binary_predictions = (predictions >= 0.5).astype(int)

accuracy = accuracy_score(y, binary_predictions)
precision = precision_score(y, binary_predictions)
recall = recall_score(y, binary_predictions)
f1 = f1_score(y, binary_predictions)
auc = roc_auc_score(y, predictions)
conf_matrix = confusion_matrix(y, binary_predictions)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print(f"AUC-ROC: {auc:.2f}")
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.72
Precision: 0.77
Recall: 0.81
F1-Score: 0.79
AUC-ROC: 0.74
Confusion Matrix:
 [[ 40  31]
 [ 25 104]]


Despliegue del modelo

In [None]:
from sagemaker.xgboost.model import XGBoostModel

role = "arn:aws:iam::004082821794:role/SageMakerExecutionRole"

#Definir la ruta del modelo en S3 (ajusta con la ruta exacta de tu job)
model_data = 's3://fraud-detection-challenge-2025/output/sagemaker-xgboost-2025-07-24-02-32-10-685/output/model.tar.gz'

model = XGBoostModel(
    model_data=model_data,
    role=role,
    framework_version='1.5-1'  # Versión usada en el training
)

print("Modelo configurado con éxito.")

predictor = model.deploy(
    initial_instance_count=1,  
    instance_type='ml.m4.xlarge'  
)

print(f"Endpoint desplegado. Nombre: {predictor.endpoint_name}")

Modelo configurado con éxito.


INFO:sagemaker.image_uris:Ignoring unnecessary instance type: ml.m4.xlarge.
INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-004082821794
INFO:sagemaker:Creating model with name: sagemaker-xgboost-2025-07-24-03-20-17-086
INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2025-07-24-03-20-18-825
INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2025-07-24-03-20-18-825


--------!Endpoint desplegado. Nombre: sagemaker-xgboost-2025-07-24-03-20-18-825


Pruebas de inferencia

In [120]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer
import numpy as np

endpoint_name = 'sagemaker-xgboost-2025-07-24-03-20-18-825'

#Configurar el predictor
predictor = Predictor(endpoint_name=endpoint_name, serializer=CSVSerializer())

data = pd.read_csv('credit_risk_processed_test.csv', header=None)
X = data.iloc[:, 1:].values  
y = data.iloc[:, 0].values   #Primera columna como target

#Invocar el endpoint
response = predictor.predict(X)
predictions = np.fromstring(response.decode('utf-8'), sep='\n')
binary_predictions = (predictions >= 0.5).astype(int)

accuracy = accuracy_score(y, binary_predictions)
precision = precision_score(y, binary_predictions)
recall = recall_score(y, binary_predictions)
f1 = f1_score(y, binary_predictions)
auc = roc_auc_score(y, predictions)
conf_matrix = confusion_matrix(y, binary_predictions)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print(f"AUC-ROC: {auc:.2f}")
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.72
Precision: 0.77
Recall: 0.81
F1-Score: 0.79
AUC-ROC: 0.74
Confusion Matrix:
 [[ 40  31]
 [ 25 104]]
