In [50]:
import awswrangler as wr
import joblib
import tempfile
import boto3

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


In [56]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn

sagemaker_session = sagemaker.Session()
role = 'AmazonSageMaker-ExecutionRole-20230224T200599' #get_execution_role()
region = sagemaker_session.boto_session.region_name

In [43]:
seed = 42

## Carga de datos

In [29]:
df_recipe_data = wr.s3.read_csv(path="s3://recipes-data-models-sagemaker-bucket/data/cleaned_recipes.csv")
df_recipe_data.head(5)

Unnamed: 0,titulo,categoria,ingredientes,elaboracion,link,total_ingredientes,titulo_link,ingredientes_limpios
0,"Buñuelos de viento fáciles, la receta tradicio...",postres,['125 gr de harina' '30 gr de mantequilla' '1/...,Otoño no es solo época de calabazas y castañas...,https://www.hogarmania.com//cocina/recetas/pos...,9.0,bunuelos,harina mantequilla agua azúcar huevos limón sa...
1,Corona de hojaldre de Navidad,postres,['2 láminas de hojaldre rectangular ' '150 g d...,"Los polvorones, los turrones, los mazapanes o ...",https://www.hogarmania.com//cocina/recetas/pos...,7.0,corona navidad,láminas hojaldre chocolate negro mantequilla g...
2,Cafés de Navidad: Gingerbread Latte y Pumpkin ...,postres,['2 cucharadas de azúcar moreno suave'\n '1/2 ...,"La temporada de invierno, junto a la época nav...",https://www.hogarmania.com//cocina/recetas/pos...,8.0,cafes navidad gingerbread pumpkin spice latte,azúcar moreno suave jengibre molido nuez mosca...
3,Mazapanes de Navidad,postres,['300 gr. de almendra molida ' '370 gr. de lec...,Mezcla en un bol la almendra molida con la lec...,https://www.hogarmania.com//cocina/recetas/pos...,7.0,mazapanes navidad,almendra molida leche condensada limón agua ac...
4,"Churros en freidora de aire, ¡más fácil imposi...",postres,['Churros congelados' 'Aceite de oliva o giras...,Comienza por precalentar la freidora de aire. ...,https://www.hogarmania.com//cocina/recetas/pos...,3.0,churros freidora aire,churros congelados aceite oliva girasol azúcar


## Preprocesamiento de datos

In [39]:
label_encoder = LabelEncoder()
df_recipe_data['encoded_categoria'] = label_encoder.fit_transform(df_recipe_data['categoria'])

In [40]:
dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

{'arroces': 0,
 'carnes': 1,
 'ensaladas': 2,
 'entrantes': 3,
 'huevos': 4,
 'pastas-pizzas': 5,
 'pescados-mariscos': 6,
 'postres': 7,
 'segundos-platos': 8,
 'sopas-cremas': 9}

In [41]:
s3_client = boto3.client('s3')
bucket_name = "recipes-data-models-sagemaker-bucket"
key = "models/label_encoder.joblib"

with tempfile.TemporaryFile() as fp:
    joblib.dump(label_encoder, fp)
    fp.seek(0)
    s3_client.put_object(Body=fp.read(), Bucket=bucket_name, Key=key)

In [44]:
df = df_recipe_data[['ingredientes_limpios', 'encoded_categoria']] # feature, class label
train_data, test_data = train_test_split(df, test_size=0.3, shuffle=True, stratify=df['encoded_categoria'], random_state=seed)

In [45]:
wr.s3.to_csv(
    df=train_data,
    path="s3://recipes-data-models-sagemaker-bucket/data/train_data.csv",
    index=False
)
wr.s3.to_csv(
    df=test_data,
    path="s3://recipes-data-models-sagemaker-bucket/data/test_data.csv",
    index=False
)

{'paths': ['s3://recipes-data-models-sagemaker-bucket/data/test_data.csv'],
 'partitions_values': {}}

## Estimador

In [59]:
sklearn_estimator = SKLearn(
    "train.py",
    instance_type="ml.t3.medium",
    role=role,
    framework_version="1.2-1",
    base_job_name="sgd-recipe-clf",
    metric_definitions=[
        {"Name": "balanced_accuracy", "Regex": "test_balanced_accuracy: ([0-9.]+).*$"}
    ],
    use_spot_instances=True,
    max_wait=1800,
    max_run=900,
)
# revisar el model_dir

In [60]:
sklearn_estimator.fit(
    {
        "train": f"s3://{bucket_name}/data/train.csv",
        "test": f"s3://{bucket_name}/data/test.csv",
        "encoder": f"s3://{bucket_name}/models/label_encoder.joblib",
    }
)

INFO:sagemaker:Creating training-job with name: sgd-recipe-clf-2024-06-28-20-26-35-569


ResourceLimitExceeded: An error occurred (ResourceLimitExceeded) when calling the CreateTrainingJob operation: The account-level service limit 'ml.t3.medium for training job usage' is 0 Instances, with current utilization of 0 Instances and a request delta of 1 Instances. Please use AWS Service Quotas to request an increase for this quota. If AWS Service Quotas is not available, contact AWS support to request an increase for this quota.