In [1]:
# setups

import os
import json

import boto3
import sagemaker
from sagemaker.pytorch import PyTorchModel
from sagemaker import get_execution_role, Session

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
sess = Session()
bucket = sess.default_bucket()
prefix = "sagemaker/pytorch-toneclone"
role = get_execution_role()

In [3]:
model = PyTorchModel(
    entry_point="spectrogramCNN_inference.py",
    role=role,
    model_data='s3://toneclone-bucket/Models/multi_effects_model_alt7.tar.gz',
    framework_version="1.11.0",
    py_version="py38",
)

In [4]:
from sagemaker.serializers import NumpySerializer
from sagemaker.deserializers import JSONDeserializer

predictor = model.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.large',
    serializer=NumpySerializer(),
    deserializer=JSONDeserializer(),
    endpoint_name='ToneClone'
)

--------------!

In [5]:
import numpy as np

test_single = np.load('single_spectrogram.npy')
test_multiple = np.load('spectrograms.npy')

In [6]:
response_single = predictor.predict(test_single)

In [7]:
response_single

{'Segment 1 (00:00 - 00:10)': [['fuzz', 'auto_filter'],
  {'overdrive': 7.736129919067025e-05,
   'distortion': 5.564602667540441e-10,
   'fuzz': 0.9999750852584839,
   'tremolo': 5.1055732086524586e-08,
   'phaser': 3.926947300669781e-08,
   'flanger': 1.1052916394638235e-11,
   'chorus': 0.0004993336624465883,
   'delay': 3.7162229205023323e-07,
   'hall_reverb': 5.9282434138800775e-15,
   'plate_reverb': 7.191102895376389e-07,
   'octaver': 2.9258508220664226e-06,
   'auto_filter': 0.9942409992218018}]}

In [8]:
response_multiple = predictor.predict(test_multiple)

In [9]:
response_multiple

{'Segment 1 (00:00 - 00:10)': [['fuzz', 'auto_filter'],
  {'overdrive': 7.736114639556035e-05,
   'distortion': 5.56457102618424e-10,
   'fuzz': 0.9999750852584839,
   'tremolo': 5.105602340904625e-08,
   'phaser': 3.926954761368506e-08,
   'flanger': 1.1052958895363396e-11,
   'chorus': 0.0004993343609385192,
   'delay': 3.7162195098972006e-07,
   'hall_reverb': 5.928198097617399e-15,
   'plate_reverb': 7.191068789325072e-07,
   'octaver': 2.9258394533826504e-06,
   'auto_filter': 0.9942409992218018}],
 'Segment 2 (00:10 - 00:20)': [['fuzz', 'auto_filter'],
  {'overdrive': 0.00035637745168060064,
   'distortion': 9.94045179503189e-10,
   'fuzz': 0.9999526739120483,
   'tremolo': 8.3157543073753e-09,
   'phaser': 2.534349796690094e-08,
   'flanger': 3.0488972891512534e-12,
   'chorus': 3.398962871870026e-05,
   'delay': 1.6342679431957663e-09,
   'hall_reverb': 5.417352764297556e-12,
   'plate_reverb': 0.019980454817414284,
   'octaver': 2.4755760819061834e-07,
   'auto_filter': 0.8720

In [10]:
import boto3
import json
from tempfile import TemporaryFile

outfile = TemporaryFile()

endpoint = 'ToneClone'
 
runtime = boto3.Session().client('sagemaker-runtime')

np.save(outfile, test_multiple)
_ = outfile.seek(0)

In [11]:
response = runtime.invoke_endpoint(EndpointName=endpoint, ContentType='application/x-npy', Body=outfile)

In [12]:
response

{'ResponseMetadata': {'RequestId': '34bd0584-6fd5-4aba-8c39-4977be70f86f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '34bd0584-6fd5-4aba-8c39-4977be70f86f',
   'x-amzn-invoked-production-variant': 'AllTraffic',
   'date': 'Wed, 19 Mar 2025 22:15:33 GMT',
   'content-type': 'application/json',
   'content-length': '8878',
   'connection': 'keep-alive'},
  'RetryAttempts': 0},
 'ContentType': 'application/json',
 'InvokedProductionVariant': 'AllTraffic',
 'Body': <botocore.response.StreamingBody at 0x7fca9d94d4b0>}

In [13]:
result = json.loads(response['Body'].read().decode())

In [14]:
result

{'Segment 1 (00:00 - 00:10)': [['fuzz', 'auto_filter'],
  {'overdrive': 7.736114639556035e-05,
   'distortion': 5.56457102618424e-10,
   'fuzz': 0.9999750852584839,
   'tremolo': 5.105602340904625e-08,
   'phaser': 3.926954761368506e-08,
   'flanger': 1.1052958895363396e-11,
   'chorus': 0.0004993343609385192,
   'delay': 3.7162195098972006e-07,
   'hall_reverb': 5.928198097617399e-15,
   'plate_reverb': 7.191068789325072e-07,
   'octaver': 2.9258394533826504e-06,
   'auto_filter': 0.9942409992218018}],
 'Segment 2 (00:10 - 00:20)': [['fuzz', 'auto_filter'],
  {'overdrive': 0.00035637745168060064,
   'distortion': 9.94045179503189e-10,
   'fuzz': 0.9999526739120483,
   'tremolo': 8.3157543073753e-09,
   'phaser': 2.534349796690094e-08,
   'flanger': 3.0488972891512534e-12,
   'chorus': 3.398962871870026e-05,
   'delay': 1.6342679431957663e-09,
   'hall_reverb': 5.417352764297556e-12,
   'plate_reverb': 0.019980454817414284,
   'octaver': 2.4755760819061834e-07,
   'auto_filter': 0.8720

In [22]:
predictor.delete_endpoint()