In [5]:
%%writefile entry_point.py
import argparse
import os
import datetime as dt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import pandas as pd
import joblib
from io import StringIO

def model_fn(model_dir):
    model = joblib.load(os.path.join(model_dir, 'model.joblib'))
    return model

def predict_fn(input_object, model):
    y_pred = model.predict_proba(input_object)[0][1]
    return y_pred

def input_fn(request_body, request_content_type):
    print(request_body)
   
    df = pd.read_csv(StringIO(request_body), header=None)
   
    print(df)
   
    return df.to_numpy()

if __name__ =='__main__':

    parser = argparse.ArgumentParser()

    # Data, model, and output directories
    parser.add_argument('--output-data-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
    parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
    parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
    parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST'))
    
    args, _ = parser.parse_known_args()
    train = pd.read_csv('{}/train.csv'.format(args.train), header=None)
    test = pd.read_csv('{}/test.csv'.format(args.test), header=None)
    
    
    
    X_train = train.iloc[:, 1:]
    y_train = train.iloc[:, 0]
    
    X_test = test.iloc[:, 1:]
    y_test = test.iloc[:, 0]
    
    
    
    X_train = pd.to_datetime(X_train)
    X_train = X_train.map(dt.datetime.toordinal)
    
    X_test = pd.to_datetime(X_test)
    X_test = X_test.map(dt.datetime.toordinal)
    
    model = LinearRegression()
    
    model.fit(X_train, y_train)
    
    
    y_test_predict = model.predict(X_test)
    
    puntuation = r2_score(y_test, y_test_predict)
    mae = mean_absolute_error(y_test, y_test_predict)
    mse = mean_squared_error(y_test, y_test_predict)
    
    print(f"Coeficientes del modelo: {model.coef_}")
    print(f"Intresección del modelo: {model.intercept_}")
    print(f"Número de coeficientes del modelo: {len(model.coef_)}")
    
    print(f"Score r2: {puntuation}")
    print(f"Score mae: {mae}")
    print(f"Score mse: {mse}")
    
    joblib.dump(model, os.path.join(args.model_dir, 'model.joblib') )
    
    print(train)

Overwriting entry_point.py


In [6]:
!python entry_point.py --train s3://final-dollar-semestre3-sa/dollar/train --test s3://final-dollar-semestre3-sa/dollar/test --model-dir ''

Traceback (most recent call last):
  File "/home/ec2-user/SageMaker/final-project-dollar/entry_point.py", line 53, in <module>
    X_train = pd.to_datetime(X_train)
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/pandas/core/tools/datetimes.py", line 1071, in to_datetime
    result = _assemble_from_unit_mappings(arg, errors, tz)
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/pandas/core/tools/datetimes.py", line 1178, in _assemble_from_unit_mappings
    unit = {k: f(k) for k in arg.keys()}
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/pandas/core/tools/datetimes.py", line 1178, in <dictcomp>
    unit = {k: f(k) for k in arg.keys()}
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/pandas/core/tools/datetimes.py", line 1173, in f
    if value.lower() in _unit_map:
AttributeError: 'int' object has no attribute 'lower'


In [4]:
import sagemaker
from sagemaker import image_uris
from sagemaker.estimator import Estimator

In [5]:
#container = image_uris.retrieve(region=boto3.Session().region_name, framework="linear-learner")

In [27]:
import datetime as dt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import pandas as pd
train = pd.read_csv('data/train.csv', header=0)
test = pd.read_csv('data/test.csv', header=0)

In [24]:
X_train = train.iloc[:, 0]
y_train = train.iloc[:, 1]

X_test = test.iloc[:, 0]
y_test = test.iloc[:, 1]

In [25]:
X_train = pd.to_datetime(X_train)
X_train = X_train.map(dt.datetime.toordinal)

X_test = pd.to_datetime(X_test)
X_test = X_test.map(dt.datetime.toordinal)

print(f" len X_train:{type(X_train)}, len X_test:{type(X_test)} ")
print(f" len y_train:{type(y_train)}, len y_test:{type(y_test)} ")

 len X_train:<class 'pandas.core.series.Series'>, len X_test:<class 'pandas.core.series.Series'> 
 len y_train:<class 'pandas.core.series.Series'>, len y_test:<class 'pandas.core.series.Series'> 


In [28]:
X_train =  np.array(X_train).reshape(1, -1)
X_test =  np.array(X_test).reshape(1, -1)

y_train =  np.array(y_train).reshape(1, -1)
y_test =  np.array(y_test).reshape(1, -1)

In [29]:
print(f" len X_train:{len(X_train[0])}, len X_test:{len(X_test[0])} ")
print(f" len y_train:{len(y_train[0])}, len y_test:{len(y_test[0])} ")

 len X_train:692, len X_test:174 
 len y_train:692, len y_test:174 


In [30]:
model = LinearRegression()

model.fit(X_train, y_train)

print(model.score(X_test, y_test))

ValueError: X has 174 features, but LinearRegression is expecting 692 features as input.

In [20]:
y_train_predict = model.predict(X_train)

print(f"Coeficientes del modelo: {model.coef_}")
print(f"Intresección del modelo: {model.intercept_}")
print(f"Número de coeficientes del modelo: {len(model.coef_)}")



Coeficientes del modelo: [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Intresección del modelo: [4572.78   4572.9031 4572.8265 4573.0595 4573.1477 4573.2283 4573.37
 4573.4327 4572.678  4572.6423 4572.2873 4572.2703 4572.2498 4571.7827
 4571.7749 4571.7598 4571.745  4571.7114 4571.6523 4571.5208 4571.4096
 4571.3696 4571.3303 4571.0371 4570.9884 4570.8047 4570.614  4570.5705
 4570.5122 4570.4702 4570.4287 4570.3193 4570.2659 4569.3341 4569.2541
 4569.2208 4569.1556 4569.1204 4568.9361 4568.8565 4568.7325 4568.6991
 4568.3014 4568.1801 4568.0638 4567.9523 4567.8453 4567.8211 4567.7193
 4567.6213 4567.5195 4567.4918 4567.4368 4567.15   4567.1287 4567.0854
 4567.0634 4567.0481 4567.035  4567.0085 4567.006  4567.0046 4567.0045
 4567.0045 4567.0043 4567.0043 4567.0002 4566.9978 4566.9938 4566.9917
 4566.9838 4566.972  4566.9642 4566.9603 4566.9209 4566.8848 4566.8719
 4566.8575 4566

In [21]:
model.fit(X_test, y_test)
y_pred = model.predict(X_test)

puntuation = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"Score r2: {puntuation}")
print(f"Score mae: {mae}")
print(f"Score mse: {mse}")



Score r2: nan
Score mae: 0.0
Score mse: 0.0


