In [40]:
%%writefile entry_point.py
import argparse
import os
import datetime as dt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import pandas as pd
import joblib
from io import StringIO

def model_fn(model_dir):
    model = joblib.load(os.path.join(model_dir, 'model.joblib'))
    return model

def predict_fn(input_object, model):
    y_pred = model.predict_proba(input_object)[0][1]
    return y_pred

def input_fn(request_body, request_content_type):
    print(request_body)
   
    df = pd.read_csv(StringIO(request_body), header=None)
   
    print(df)
   
    return df.to_numpy()

if __name__ =='__main__':

    parser = argparse.ArgumentParser()

    # Data, model, and output directories
    parser.add_argument('--output-data-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
    parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
    parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
    parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST'))
    
    args, _ = parser.parse_known_args()
    train = pd.read_csv('{}/train.csv'.format(args.train))
    test = pd.read_csv('{}/test.csv'.format(args.test))
    
    
    
    train['fecha'] = pd.to_datetime(train['fecha'])
    train['fecha'] = train['fecha'].map(dt.datetime.toordinal)

    test['fecha'] = pd.to_datetime(test['fecha'])
    test['fecha'] = test['fecha'].map(dt.datetime.toordinal)
    
    
    X_train = np.array(train['fecha']).reshape(-1, 1)
    y_train = np.array(train['value']).reshape(-1, 1)

    X_test = np.array(test['fecha']).reshape(-1, 1)
    y_test = np.array(test['value']).reshape(-1, 1)
    
    model = LinearRegression()

    model.fit(X_train, y_train)

    print(model.score(X_test, y_test))
    
    
    y_pred = model.predict(X_test)

    print(f"Coeficientes del modelo: {model.coef_}")
    print(f"Intresección del modelo: {model.intercept_}")
    print(f"Número de coeficientes del modelo: {len(model.coef_)}")
    
    puntuation = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)

    print(f"Score r2: {puntuation}")
    print(f"Score mae: {mae}")
    print(f"Score mse: {mse}")

    
    joblib.dump(model, os.path.join(args.model_dir, 'model.joblib') )
    
    print(train)

Overwriting entry_point.py


In [41]:
!python entry_point.py --train s3://final-dollar-semestre3-sa/dollar/train --test s3://final-dollar-semestre3-sa/dollar/test --model-dir ''

-106.45143917751096
Coeficientes del modelo: [[0.]]
Intresección del modelo: [4568.84564725]
Número de coeficientes del modelo: 1
Score r2: -106.45143917751096
Score mae: 2.1821889525608276
Score mse: 4.80668215445953
      fecha      value
0    738615  4572.7800
1    738615  4572.9031
2    738615  4572.8265
3    738615  4573.0595
4    738615  4573.1477
..      ...        ...
687  738615  4571.1079
688  738615  4571.1100
689  738615  4571.1185
690  738615  4571.1247
691  738615  4571.1310

[692 rows x 2 columns]


In [42]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn

In [43]:
estimator = SKLearn(role='Labrole', instance_type='ml.m4.xlarge',
                   entry_point='entry_point.py',
                   framework_version='1.0-1')

In [None]:
estimator.fit({'train':'s3://final-dollar-semestre3-sa/dollar/train',
               'test':'s3://final-dollar-semestre3-sa/dollar/test'})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2023-04-08-01-55-03-087


2023-04-08 01:55:05 Starting - Starting the training job...
2023-04-08 01:55:29 Starting - Preparing the instances for training.......