In [1]:
## this is train.py

import argparse
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import joblib
import pandas as pd

# Define a simple model (for example, a linear regression model)
class SimpleModel(nn.Module):
    def __init__(self, input_dim):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(input_dim, 1)
    
    def forward(self, x):
        return self.fc(x)

def main():
    # Parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
    parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
    args = parser.parse_args()

    # Load data from S3
    train_data = pd.read_csv(os.path.join(args.train, "data.csv"))
    X = train_data.drop("target", axis=1).values
    y = train_data["target"].values
    
    # Convert to PyTorch tensors
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)

    # Create DataLoader
    dataset = TensorDataset(X_tensor, y_tensor)
    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

    # Initialize the model, loss, and optimizer
    input_dim = X.shape[1]
    model = SimpleModel(input_dim)
    criterion = nn.MSELoss()  # Mean Squared Error Loss for regression
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # Train the model
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

    # Save the model
    model_path = os.path.join(args.model_dir, "model.pth")
    torch.save(model.state_dict(), model_path)
    print(f"Model saved to {model_path}")

if __name__ == "__main__":
    main()


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [None]:
import sagemaker
from sagemaker.pytorch import PyTorch

# Define S3 paths
bucket = "your-s3-bucket-name"
prefix = "pytorch-linear-regression"
train_input = f"s3://{bucket}/{prefix}/train/"

# Create PyTorch Estimator
pytorch_estimator = PyTorch(
    entry_point="train.py",
    framework_version="1.6.0",  # Change based on available versions
    instance_type="ml.p2.xlarge",
    instance_count=1,
    role="your-sagemaker-execution-role",
    hyperparameters={"epochs": 10},
    sagemaker_session=sagemaker.Session(),
)

# Launch training job
pytorch_estimator.fit({"train": train_input})

In [None]:
# Deploy the trained model
predictor = pytorch_estimator.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
)

# Use the endpoint to make predictions
data = [[1200, 3, 2, 1]]  # Example input
prediction = predictor.predict(data)
print(f"Prediction: {prediction}")

In [None]:
predictor.delete_endpoint()