In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.api.types import is_numeric_dtype

import boto3
import logging
import os
import argparse
import json
import sagemaker
from sagemaker.sklearn import SKLearn
from sagemaker import Session
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
from sagemaker import get_execution_role
import yaml
from sagemaker.sklearn.estimator import SKLearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import RFE
from lightgbm import LGBMClassifier
import itertools
import joblib
from tabulate import tabulate
from pathlib import Path
from timeutils import Stopwatch
import tarfile
import warnings
import optuna
warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

# Initialize logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [9]:
# Function to create a Boto3 session
def create_boto_session(aws_access_key_id, aws_secret_access_key):
    print('Creating boto session...')
    return boto3.Session(
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
        region_name='us-east-1'  # Replace with your desired AWS region
    )

# AWS credentials (make sure to handle credentials securely)
aws_access_key_id = '...'  # Replace with your actual AWS access key
aws_secret_access_key = '...'  # Replace with your actual AWS secret key

# Create boto session
boto_session = create_boto_session(aws_access_key_id, aws_secret_access_key)

# Initialize SageMaker session
sagemaker_session = sagemaker.Session(boto_session=boto_session)

# IAM role ARN
role = 'arn:aws:iam::730335322557:role/SagemakerRoleNet1'  # Replace with your actual SageMaker role ARN

# S3 paths for input and output data
bucket = 'datasetscybersec'
s3_input_train = "s3://{}/NID/Train_data.csv".format(bucket)  # Ensure you have the full path to the training data
s3_input_test = "s3://{}/NID/Test_data.csv".format(bucket)   # Ensure you have the full path to the testing data
#output_path = "s3://{}/model".format(bucket)  # Path to store model output
prefix = 'model'  # Prefix for model storage in S3
code_path = "s3://{}/{}".format (bucket, prefix)
output_path = "s3://{}/{}".format(bucket, prefix)


# Define the SKLearn estimator
sklearn_estimator = SKLearn(
    entry_point='model.py',  # The script to be executed
    source_dir='.',  # If your script is in the current directory
    role=role,
    instance_type='ml.m5.large',  # Instance type
    instance_count=1,
    base_job_name='sk-network',
    framework_version='1.2-1',  # Replace with your sklearn version
    py_version='py3',
    script_mode=True,
    sagemaker_session=sagemaker_session,
    code_location=code_path,
    output_path=output_path  # Where the model artifacts will be stored
)


Creating boto session...


In [7]:
# Define input channels for training and testing data
train_input = sagemaker.inputs.TrainingInput(s3_data=s3_input_train, content_type='text/csv')
test_input = sagemaker.inputs.TrainingInput(s3_data=s3_input_test, content_type='text/csv')

# Run the training job
# sklearn_estimator.fit({'train': train_input, 'test': test_input}, job_name='sk-network')


# Start the training job
#sklearn_estimator.fit({'train': 's3://testasdfasdfasdfadsfa/abc/'})

sklearn_estimator.fit({'train': train_input, 'test': test_input}, job_name='sk-network1', wait=True, logs=True)



# Function to stream the logs
# def stream_logs(estimator):
#     job_name = estimator.latest_training_job.name
#     sagemaker.logs.Logs().show_logs(job_name=job_name, wait=True)

# # Start the training job
# sklearn_estimator.fit(
#     {'train': train_input, 'test': test_input}, 
#     job_name='sk-network', 
#     wait=False,  # Start job without waiting
#     logs=False   # Disable built-in log streaming
# )

# # Stream logs manually
# stream_logs(sklearn_estimator)

aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa


KeyboardInterrupt: 

In [None]:
import sagemaker
from sagemaker import get_execution_role

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Describe the training job
training_job_name = 'sk-network'  # Replace with your job name
job_description = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=training_job_name)

print(job_description)

In [None]:
# Deploy the model
# predictor = sklearn_estimator.deploy(initial_instance_count=1, instance_type='ml.t3.2xlarge')

In [None]:
# Make predictions (replace with your test data)
# result = predictor.predict('sample test data')
# print(result)

In [None]:
# sagemaker_client = boto3.client('sagemaker')
# training_jobs = sagemaker_client.list_training_jobs(MaxResults=5)  # Adjust as needed

# for job in training_jobs['TrainingJobSummaries']:
#     print(f"Training Job Name: {job['TrainingJobName']}")
#     print(f"Training Job Status: {job['TrainingJobStatus']}")

In [None]:
# import boto3
# import time

# sagemaker_client = boto3.client('sagemaker')

# # Replace 'your-training-job-name' with your actual job name
# job_name = 'sk-network-2024-08-20-09-00-47-671'

# response = sagemaker_client.describe_training_job(TrainingJobName=job_name)
# job_status = response['TrainingJobStatus']
# start_time = response['TrainingStartTime']
# end_time = response.get('TrainingEndTime', None)

# if end_time:
#     elapsed_time = end_time - start_time
# else:
#     elapsed_time = time.time() - start_time.timestamp()

# print(f"Training Job Status: {job_status}")
# print(f"Start Time: {start_time}")
# if end_time:
#     print(f"End Time: {end_time}")
# print(f"Elapsed Time (in seconds): {elapsed_time}")


In [None]:
# job_name = sklearn_estimator.latest_training_job.name  # Get the actual job name

# # Monitor the training job
# response = sagemaker_client.describe_training_job(TrainingJobName=job_name)
# job_status = response['TrainingJobStatus']
# start_time = response['TrainingStartTime']
# end_time = response.get('TrainingEndTime', None)

# if end_time:
#     elapsed_time = end_time - start_time
# else:
#     elapsed_time = time.time() - start_time.timestamp()

# print(f"Training Job Name: {job_name}")
# print(f"Training Job Status: {job_status}")
# print(f"Start Time: {start_time}")
# if end_time:
#     print(f"End Time: {end_time}")
# print(f"Elapsed Time (in seconds): {elapsed_time}")