In [1]:
# This command upgrades the 'numexpr' library using pip
!pip install --upgrade numexpr

Collecting numexpr
  Downloading numexpr-2.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Downloading numexpr-2.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.2/375.2 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: numexpr
  Attempting uninstall: numexpr
    Found existing installation: numexpr 2.8.8
    Uninstalling numexpr-2.8.8:
      Successfully uninstalled numexpr-2.8.8
Successfully installed numexpr-2.9.0


In [3]:
#Import Necessary packages
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.inputs import TrainingInput
import boto3
import os

In [4]:
# Create a SageMaker session, which manages interactions with SageMaker services
sagemaker_session = sagemaker.Session()

# Get the IAM execution role used for SageMaker to access AWS resources
role = get_execution_role()

# Get the AWS region associated with the SageMaker session
region = sagemaker_session.boto_region_name

In [5]:
print(role)
print(region)

arn:aws:iam::661652319211:role/LabRole
us-east-1


In [7]:
# Import the pandas library as 'pd' for data manipulation and analysis
import pandas as pd

# Import the train_test_split function from scikit-learn for data splitting
from sklearn.model_selection import train_test_split

In [13]:
nba_data = pd.read_csv('2023_nba_player_stats_with_usage_rate.csv')

In [14]:
# Remove columns of data that include strings and directly correlated stats from the dataFrame
nba_data = nba_data.drop('PName', axis = 1)
nba_data = nba_data.drop('POS', axis = 1)
nba_data = nba_data.drop('Team', axis = 1)
nba_data = nba_data.drop('FGM', axis = 1)
nba_data = nba_data.drop('FGA', axis = 1)
nba_data = nba_data.drop('3PM', axis = 1)
nba_data = nba_data.drop('3PA', axis = 1)
nba_data = nba_data.drop('FTM', axis = 1)
nba_data = nba_data.drop('FTA', axis = 1)
nba_data = nba_data.drop('FP', axis = 1)
nba_data = nba_data.drop('DD2', axis = 1)
nba_data = nba_data.drop('TD3', axis = 1)

In [15]:
# Check for missing values (NaN) in the 'NBA_Data' DataFrame
nba_data.isna()

Unnamed: 0,Age,GP,W,L,Min,PTS,FG%,3P%,FT%,OREB,DREB,REB,AST,TOV,STL,BLK,PF,+/-,Usage Rate
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
535,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
536,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
537,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [16]:
# Remove rows with missing values (NaN) from the 'housing_data' DataFrame
nba_data = nba_data.dropna()

In [17]:
# The target variable column is named 'PTS'

# Create the feature matrix 'X' by dropping the 'PTS' column
X = nba_data.drop('PTS', axis=1)

# Create the target variable 'y' by converting 'PTS' to integers
y = nba_data['PTS'].astype('int')

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
# Concatenate the features and labels back into one DataFrame for training data
nba_train_data = pd.concat([y_train, X_train], axis=1)

# Concatenate the features and labels back into one DataFrame for validation data
nba_validation_data = pd.concat([y_val, X_val], axis=1)

# Save the training data to a CSV file without headers and indices
nba_train_data.to_csv('NBAData_train.csv', header=False, index=False)

# Save the validation data to a CSV file without headers and indices
nba_validation_data.to_csv('NBAData_validation.csv', header=False, index=False)

In [19]:
# Define your Amazon S3 bucket and prefix for data storage
bucket = sagemaker_session.default_bucket()
prefix = 'sagemaker/nba/classification'

# Paths to your local data files - replace with your actual file paths
local_train = 'NBAData_train.csv'
local_validation = 'NBAData_validation.csv'

# Upload the local training data to the specified S3 bucket and prefix
train_uri = sagemaker_session.upload_data(local_train, bucket=bucket, key_prefix=prefix)

# Upload the local validation data to the specified S3 bucket and prefix
validation_uri = sagemaker_session.upload_data(local_validation, bucket=bucket, key_prefix=prefix)

In [20]:
# Print the S3 URI for the training data
print("Training URI: ", train_uri)

# Print the S3 URI for the validation data
print("Validation URI: ", validation_uri)

Training URI:  s3://sagemaker-us-east-1-661652319211/sagemaker/nba/classification/NBAData_train.csv
Validation URI:  s3://sagemaker-us-east-1-661652319211/sagemaker/nba/classification/NBAData_validation.csv


In [22]:
from sagemaker import image_uris

# Retrieve the container image URI for the SageMaker Linear Learner algorithm
container = image_uris.retrieve(framework='linear-learner', region=region)

In [21]:
# Calculate the number of rows and features in the 'NBA_data' DataFrame
num_rows, num_features = nba_data.shape

# Print the number of rows and features
print("Number of Rows:", num_rows)
print("Number of Features:", num_features)

Number of Rows: 539
Number of Features: 19


In [23]:
# Create a SageMaker Linear Learner estimator
linear_learner = sagemaker.estimator.Estimator(container,
                                               role, 
                                               instance_count=1, 
                                               instance_type='ml.m5.large',
                                               output_path=f's3://{bucket}/{prefix}/output',
                                               sagemaker_session=sagemaker_session)

# Set hyperparameters for the Linear Learner
linear_learner.set_hyperparameters(feature_dim=18,  # Number of input features (excluding target)
                                   mini_batch_size=32,  # Size of mini-batches for training
                                   predictor_type='regressor',  # Specify 'regressor' for regression
                                   normalize_data=True,  # Normalize input features
                                   normalize_label=True)  # Normalize target variable for regression

In [24]:
# Fit the SageMaker Linear Learner estimator to the training and validation data
linear_learner.fit({'train': TrainingInput(train_uri, content_type='text/csv'),
                    'validation': TrainingInput(validation_uri, content_type='text/csv')})

INFO:sagemaker:Creating training-job with name: linear-learner-2024-03-11-00-54-23-661


2024-03-11 00:54:23 Starting - Starting the training job...
2024-03-11 00:54:38 Starting - Preparing the instances for training...
2024-03-11 00:55:20 Downloading - Downloading input data......
2024-03-11 00:56:00 Downloading - Downloading the training image......
2024-03-11 00:57:20 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[03/11/2024 00:57:22 INFO 139650672301888] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'opt

In [25]:
EndpointConfig="regression-linear-learner-endpoint"
Endpoint="regression-linear-learner-endpoint"

In [26]:
import boto3

def delete_sagemaker_endpoint(endpoint_name):
    # Initialize SageMaker client
    sagemaker = boto3.client('sagemaker', region_name=region)
    
    try:
        # Check if the endpoint configuration exists
        response = sagemaker.describe_endpoint_config(EndpointConfigName=endpoint_name)
        
        # If the configuration exists, delete it
        if response:
            sagemaker.delete_endpoint_config(EndpointConfigName=endpoint_name)
            print(f"Endpoint configuration '{endpoint_name}' has been deleted.")
        
        # Check if the endpoint exists
        response = sagemaker.describe_endpoint(EndpointName=endpoint_name)
        
        # If the endpoint exists, delete it
        if response:
            sagemaker.delete_endpoint(EndpointName=endpoint_name)
            print(f"Endpoint '{endpoint_name}' has been deleted.")
        
        return True  # Deletion successful
    except Exception as e:
        error_message = str(e)
        if "Could not find endpoint configuration" in error_message:
            print(f"Endpoint configuration '{endpoint_name}' not found. No action taken.")
            return True  # Configuration not found, exit gracefully
        elif "Could not find endpoint" in error_message:
            print(f"Endpoint '{endpoint_name}' not found. No action taken.")
            return True  # Endpoint not found, exit gracefully
        else:
            print(f"Error deleting SageMaker endpoint and configuration: {error_message}")
            return False  # Deletion failed

In [27]:
# Delete the Endpoint and Config

result = delete_sagemaker_endpoint(Endpoint)
if result:
    print(f"Endpoint '{Endpoint}' and its configuration have been deleted.")
else:
    print(f"Failed to delete endpoint '{Endpoint}' and its configuration.")

Endpoint configuration 'regression-linear-learner-endpoint' has been deleted.
Endpoint 'regression-linear-learner-endpoint' has been deleted.
Endpoint 'regression-linear-learner-endpoint' and its configuration have been deleted.


In [28]:
import boto3

# Create a SageMaker client to interact with the SageMaker service
sagemaker_client = boto3.client('sagemaker')

# Deploy the Linear Learner model to the SageMaker endpoint
linear_predictor = linear_learner.deploy(
    initial_instance_count=1,  # Number of initial instances
    instance_type='ml.m5.large',  # Type of instance for serving
    endpoint_name=Endpoint  # Custom endpoint name
)

INFO:sagemaker:Creating model with name: linear-learner-2024-03-11-00-58-59-766
INFO:sagemaker:Creating endpoint-config with name regression-linear-learner-endpoint
INFO:sagemaker:Creating endpoint with name regression-linear-learner-endpoint


-------!

In [29]:
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

# Set the serializer to CSV (Comma-Separated Values)
linear_predictor.serializer = CSVSerializer()

# Set the deserializer to JSON (JavaScript Object Notation)
linear_predictor.deserializer = JSONDeserializer()

In [32]:
# Sample hardcoded data point
sample_data = [25, 74, 52, 22, 2732.2, 46.6, 35, 85.4, 78, 571, 649, 342, 213, 78, 51, 160, 470, 0.76920045]

# Convert the sample data to a CSV string
query_data_csv = ','.join([str(item) for item in sample_data])

# Querying the model and getting a prediction
response = linear_predictor.predict(query_data_csv)

# Print out the prediction
print("Predicted value:", response['predictions'][0]['score'])

Predicted value: 2016.018798828125


In [33]:
# Delete the Endpoint and Config

result = delete_sagemaker_endpoint(Endpoint)
if result:
    print(f"Endpoint '{Endpoint}' and its configuration have been deleted.")
else:
    print(f"Failed to delete endpoint '{Endpoint}' and its configuration.")

Endpoint configuration 'regression-linear-learner-endpoint' has been deleted.
Endpoint 'regression-linear-learner-endpoint' has been deleted.
Endpoint 'regression-linear-learner-endpoint' and its configuration have been deleted.
