In [None]:
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.inputs import TrainingInput
from sagemaker import get_execution_role
import time

bucket = "say1-5team-bucket"
role = get_execution_role()

est = PyTorch(
    entry_point="train.py",
    source_dir=".",
    role=role,
    framework_version="2.1",   # 너 환경에 맞게. (2.2 써도 됨. train.py도 그에 맞게)
    py_version="py310",
    instance_count=1,
    instance_type="ml.g4dn.2xlarge",
    hyperparameters={           # train.py의 argparse와 키 맞추기!
        "epochs": 30,
        "batch-size": 64,
        "lr": 0.001,
    },
    output_path=f"s3://{bucket}/densenet-output/",
    base_job_name="densenet121-skin",
    tags=[{"Key":"project","Value":"pre-5team"}],
)

inputs = {
    "train": TrainingInput(f"s3://{bucket}/densenet-training-data/train"),
    "val":   TrainingInput(f"s3://{bucket}/densenet-training-data/val"),
    "test":  TrainingInput(f"s3://{bucket}/densenet-training-data/test"),
}

job_name = f"densenet121-skin-job-{time.strftime('%Y-%m-%d-%H-%M-%S')}"
est.fit(inputs, job_name=job_name)

desc = est.latest_training_job.describe()
print("ModelArtifacts:", desc["ModelArtifacts"]["S3ModelArtifacts"])


In [None]:
import sagemaker, boto3
from sagemaker.pytorch import PyTorch

job_name = "densenet121-skin-job-2025-08-20-06-18-42"

# 상태만 빠르게 체크
sm = boto3.client("sagemaker")
print(sm.describe_training_job(TrainingJobName=job_name)["TrainingJobStatus"])

# 로그 스트리밍 (기존 잡에 attach)
sess = sagemaker.Session()
est = PyTorch.attach(training_job_name=job_name, sagemaker_session=sess)
est.latest_training_job.wait(logs=True)  # 여기서 CloudWatch 로그가 쭉 출력됨


In [None]:
# A. 이 잡이 실제로 쓴 소스 코드 묶음(SM_MODULE_DIR) 경로
print(densenet_estimator.latest_training_job.describe()['DebugHookConfig'])
print(densenet_estimator.latest_training_job.describe()['HyperParameters'])
print("module_dir:", densenet_estimator.latest_training_job.describe()['AlgorithmSpecification']['TrainingImage'])
desc = densenet_estimator.latest_training_job.describe()

# 1) 인풋 데이터(S3) 경로 확인
print("=== InputDataConfig ===")
for ch in desc["InputDataConfig"]:
    print(f"- channel: {ch['ChannelName']}, S3Uri: {ch['DataSource']['S3DataSource']['S3Uri']}")

# 2) 컨테이너 내부에서 보이는 경로 (= 마운트 위치)
for ch in desc["InputDataConfig"]:
    print(f"- inside container: /opt/ml/input/data/{ch['ChannelName']}")


# B. CloudWatch 로그에서 [SAVE] 라인이 있는지 확인했나요?
# (train.py에 아래와 같은 프린트가 있을 경우)
# [SAVE] New best (val acc=0.1234) → /opt/ml/model/model.pth
