* Notebook created by Nov05 on 2025-01-31  
* https://tensorfuse.io/blog/increase-gpu-quota-on-aws-with-python-script  
* AWS document: [ServiceQuotas.Client.**get_service_quota**()](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/service-quotas/client/get_service_quota.html)

<img src="https://raw.githubusercontent.com/nov05/pictures/refs/heads/master/Udacity/20241119_aws-mle-nanodegree/2025-01-31%2003_23_20-AWS%20Service%20Quota_ml.g6.8xlarge.jpg" width=800>   

In [3]:
from pprint import pprint
import boto3
client = boto3.client('service-quotas')
quota_arn = "arn:aws:servicequotas:us-east-1:570668189909:sagemaker/L-B0F91871"
service_code, quota_code = quota_arn.split(':')[-1].split('/')
response = client.get_service_quota(  ## ServiceCode, QuotaCode, ContextId
    ServiceCode=service_code,
    QuotaCode=quota_code,
)
pprint(response)

{'Quota': {'Adjustable': True,
           'GlobalQuota': False,
           'Period': {'PeriodUnit': 'HOUR', 'PeriodValue': 1},
           'QuotaAppliedAtLevel': 'ACCOUNT',
           'QuotaArn': 'arn:aws:servicequotas:us-east-1:570668189909:sagemaker/L-B0F91871',
           'QuotaCode': 'L-B0F91871',
           'QuotaName': 'ml.g6.8xlarge for endpoint usage',
           'ServiceCode': 'sagemaker',
           'ServiceName': 'Amazon SageMaker',
           'Unit': 'None',
           'UsageMetric': {'MetricDimensions': {'Class': 'None',
                                                'Resource': 'endpoint/ml.g6.8xlarge',
                                                'Service': 'SageMaker',
                                                'Type': 'Resource'},
                           'MetricName': 'ResourceCount',
                           'MetricNamespace': 'AWS/Usage',
                           'MetricStatisticRecommendation': 'Maximum'},
           'Value': 0.0},
 'ResponseMetadata'

In [15]:
import boto3
'''
⚠️ Important Warning: Avoid applying the script to all regions and instance types at once 
as this could trigger security issues on your account. Begin by applying to the 1-2 most 
essential instance types in 1-2 regions. Once those are approved, proceed with more. 
Remember, there's a cap on open service quota requests in EC2. If you've hit that cap, 
wait for current tickets to close before rerunning the script once the limit is lifted.
'''
regions = ['us-east-1'] #[, 'eu-west-1', 'us-east-1', 'eu-west-1', 'us-west-2', 'us-east-2', 
# 'ap-south-1', 'eu-west-2', 'eu-west-3', 'eu-north-1', 'eu-central-1', 'ca-central-1']  

service_code = 'sagemaker'
quota_codes = {
    'ml.g6.8xlarge for endpoint usage': 'L-B0F91871',
}
## Define the regions and GPU instance types to request quota increases for
# service_code = 'ec2'
# quota_codes = {
    # 'All P4, P3 and P2 Spot Instance Requests': 'L-7212CCBC',
    # 'All G and VT Spot Instance Requests': 'L-3819A6DF',
    #'All P5 Spot Instance Requests': 'L-C4BD4855',
    #'All Inf Spot Instance Requests': 'L-B5D1601B',
    #'All Trn Spot Instance Requests': 'L-6B0D517C',
# }

# Desired quota value
desired_value = 1

# Initialize the boto3 client
def request_quota_increase(region, service_code, quota_code, desired_value):
    try:
        # Check for open quota increase requests
        open_statuses = ['PENDING', 'CASE_OPENED', 'INVALID_REQUEST']
        response = client.list_requested_service_quota_change_history_by_quota(
            ServiceCode=service_code,
            QuotaCode=quota_code
        )
        for quota_request in response['RequestedQuotas']:
            if quota_request['Status'] in open_statuses:
                print(f"Open quota increase request already exists for {quota_code} "
                f"in {region} with status {quota_request['Status']}")
                return

        # Check current quota
        response = client.get_service_quota(
            ServiceCode=service_code,
            QuotaCode=quota_code
        )
        current_value = response['Quota']['Value']
        print(f"Current quota for {quota_code} in {region}: {current_value}")

        # Request quota increase if current value is less than desired value
        if current_value <= desired_value:
            response = client.request_service_quota_increase(
                ServiceCode=service_code,
                QuotaCode=quota_code,
                DesiredValue=desired_value
            )
            print(f"Requested quota increase for {quota_code} in {region} to {desired_value}")
        else:
            print(f"No increase needed for {quota_code} in {region}")
    except Exception as e:
        print(f"Error requesting quota increase for {quota_code} in {region}: {e}")


# Iterate over all regions and GPU instance types
for region in regions:
    for _, quota_code in quota_codes.items():
        client = boto3.client('service-quotas', region_name=region)
        request_quota_increase(region, service_code, quota_code, desired_value)

Open quota increase request already exists for L-7212CCBC in us-east-1 with status CASE_OPENED
Open quota increase request already exists for L-3819A6DF in us-east-1 with status CASE_OPENED
