In [1]:
%%writefile entry_point.py
import argparse
import os
import datetime as dt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import pandas as pd
import joblib
from io import StringIO

def model_fn(model_dir):
    model = joblib.load(os.path.join(model_dir, 'model.joblib'))
    return model

def predict_fn(input_object, model):
    y_pred = model.predict_proba(input_object)[0][1]
    return y_pred

def input_fn(request_body, request_content_type):
    print(request_body)
   
    df = pd.read_csv(StringIO(request_body), header=None)
   
    print(df)
   
    return df.to_numpy()

if __name__ =='__main__':

    parser = argparse.ArgumentParser()

    # Data, model, and output directories
    parser.add_argument('--output-data-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
    parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
    parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
    parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST'))
    
    args, _ = parser.parse_known_args()
    train = pd.read_csv('{}/train.csv'.format(args.train), header=None)
    test = pd.read_csv('{}/test.csv'.format(args.test), header=None)
    
    
    
    X_train = train.iloc[:, 1:]
    y_train = train.iloc[:, 0]
    
    X_test = test.iloc[:, 1:]
    y_test = test.iloc[:, 0]
    
    
    
    X_train = pd.to_datetime(X_train)
    X_train = X_train.map(dt.datetime.toordinal)
    
    test['fecha'] = pd.to_datetime(test['fecha'])
    test['fecha'] = test['fecha'].map(dt.datetime.toordinal)
    
    model = LinearRegression()
    
    model.fit(X_train, y_train)
    
    
    y_test_predict = model.predict(X_test)
    
    puntuation = r2_score(y_test, y_test_predict)
    mae = mean_absolute_error(y_test, y_test_predict)
    mse = mean_squared_error(y_test, y_test_predict)
    
    print(f"Coeficientes del modelo: {model.coef_}")
    print(f"Intresección del modelo: {model.intercept_}")
    print(f"Número de coeficientes del modelo: {len(model.coef_)}")
    
    print(f"Score r2: {puntuation}")
    print(f"Score mae: {mae}")
    print(f"Score mse: {mse}")
    
    joblib.dump(model, os.path.join(args.model_dir, 'model.joblib') )
    
    print(train)

Writing entry_point.py


In [2]:
!python entry_point.py --train s3://final-dollar-semestre3-sa/dollar/train --test s3://final-dollar-semestre3-sa/dollar/test --model-dir ''

Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 3803, in get_loc
    return self._engine.get_loc(casted_key)
  File "pandas/_libs/index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 146, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index_class_helper.pxi", line 49, in pandas._libs.index.Int64Engine._check_type
KeyError: 'fecha'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/ec2-user/SageMaker/final-project-dollar/entry_point.py", line 43, in <module>
    train['fecha'] = pd.to_datetime(train['fecha'])
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/pandas/core/frame.py", line 3805, in __getitem__
    indexer = self.columns.get_loc(key)
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/pandas/core/inde

In [2]:
import sagemaker
from sagemaker import image_uris
from sagemaker.estimator import Estimator

In [None]:
container = image_uris.retrieve(region=boto3.Session().region_name, framework="linear-learner")