# SNSDK Wrapper usage

In [1]:
import os
import sys
from src import sambastudio_utils
from src.snsdk_wrapper import SnsdkWrapper
current_dir = os.getcwd()
utils_dir = os.path.abspath(os.path.join(current_dir, ".."))
repo_dir = os.path.abspath(os.path.join(utils_dir, ".."))

sys.path.append(utils_dir)
sys.path.append(repo_dir)

## Wrapper usage with config file

In [2]:
sambastudio_wrapper = SnsdkWrapper(config_path=os.path.join(current_dir,"config.yaml"))

2024-07-29 16:10:54,443 [INFO] Using config file located in /Users/jorgep/Documents/ask_public_own/ai-starter-kit/utils/fine_tuning/config.yaml
2024-07-29 16:10:54,445 [INFO] Using variables from Snapi config to set up Snsdk.


### Dataset preparation

In [3]:
sambastudio_utils.gen_data_prep_pipeline(
    input_files = os.path.join(repo_dir, "fine_tuning_sql", "data" , "pre-training", "pretrain-squad-smol-sql.jsonl"), # set with your jsonl path or list of jsonl file paths
    output_path = os.path.join(current_dir,"sambastudio_fine_tuning_dataset"),
    tokenizer = "meta-llama/Llama-2-7b-hf",
    max_seq_length = 4096,
    )

2024-07-29 16:11:00,425 [INFO] input jsonl files merged into /Users/jorgep/Documents/ask_public_own/ai-starter-kit/fine_tuning_sql/data/pre-training/pretrain-squad-smol-sql_merged.jsonl
2024-07-29 16:11:06,415 [INFO] Processing ./generative_data_prep
2024-07-29 16:11:06,416 [INFO] Installing build dependencies: started
2024-07-29 16:11:06,416 [INFO] Installing build dependencies: finished with status 'done'
2024-07-29 16:11:06,416 [INFO] Getting requirements to build wheel: started
2024-07-29 16:11:06,417 [INFO] Getting requirements to build wheel: finished with status 'done'
2024-07-29 16:11:06,417 [INFO] Preparing metadata (pyproject.toml): started
2024-07-29 16:11:06,417 [INFO] Preparing metadata (pyproject.toml): finished with status 'done'
2024-07-29 16:11:06,427 [INFO] Building wheels for collected packages: sambanova-generative-data-prep
2024-07-29 16:11:06,427 [INFO] Building wheel for sambanova-generative-data-prep (pyproject.toml): started
2024-07-29 16:11:06,428 [INFO] Build

'/Users/jorgep/Documents/ask_public_own/ai-starter-kit/utils/fine_tuning/sambastudio_fine-tuning_dataset'

### Dataset Upload

In [3]:
sambastudio_wrapper.create_dataset()

2024-07-24 17:38:39,286 [INFO] App with name 'Mistral' found with id ab46c162-ff57-4700-beae-3ded6065ee7a
2024-07-24 17:38:39,463 [INFO] App with name 'Llama 3' found with id ad39e323-9878-4914-8e29-82c9f2939475
2024-07-24 17:38:39,657 [INFO] App with name 'Llama 2 with dynamic batching' found with id 2a633d60-ed93-47e8-b2d4-3a5aa345e320
2024-07-24 17:38:39,858 [INFO] App with name 'Llama 2 7B' found with id ec012370-6ffa-4a3a-b230-2c62613f1d89
2024-07-24 17:38:40,038 [INFO] App with name 'Llama 2 70B with dynamic batching' found with id 0b5871de-f335-43c6-a718-1300c1ef02b8
2024-07-24 17:38:40,216 [INFO] App with name 'Llama 2 70B' found with id 82254d3b-7239-458b-9da8-da1aca9b7fba
2024-07-24 17:38:40,395 [INFO] App with name 'Llama 2 13B' found with id 1bf617cb-8afb-4bbd-b92f-c15ebfdca10b
2024-07-24 17:38:40,672 [INFO] Dataset with name 'smol_sql_dataset' not found
2024-07-24 17:41:22,226 [INFO] Dataset with name 'smol_sql_dataset' found with id 411c9192-509d-4044-8929-b0cedb4fcd43
20

'411c9192-509d-4044-8929-b0cedb4fcd43'

In [4]:
sambastudio_wrapper.list_datasets()

[{'id': '1ea54b9c-f1f6-49d4-91f6-e230eec7c259',
  'dataset_name': 'Caltech_256_Clip'},
 {'id': '7198371d-0f70-4287-8645-8e3807782d30',
  'dataset_name': 'Coding_Generative_Inference_Dataset'},
 {'id': '0e277df5-0b7e-45a7-874e-c5b10364c0fe',
  'dataset_name': 'Coding_Generative_Train_4k_SS_Dataset'},
 {'id': '69d0d494-bc0a-4e1c-8dd3-84b1f55b0098', 'dataset_name': 'FiQA'},
 {'id': '17be1fa1-2ed2-4503-9ccc-99be2c905010',
  'dataset_name': 'Generative_Inference_Dataset'},
 {'id': '9bac6a44-92b2-40ac-9a4d-ba72038ac27b',
  'dataset_name': 'GPT_13B_8k_SS_Toy_Training_Dataset'},
 {'id': '6090aaa0-85ef-469f-ad2d-7bf1a5f47f1f',
  'dataset_name': 'GPT_13B_Inference_Dataset'},
 {'id': '894dd158-9552-11ed-a1eb-0242ac120002',
  'dataset_name': 'GPT_13B_Training_Dataset'},
 {'id': 'f66434c5-4944-48a8-81c9-6eb7fca7e4b9',
  'dataset_name': 'GPT_1.5B_Inference_Dataset'},
 {'id': 'd8e19905-bd36-4469-b13b-51da48b62e75',
  'dataset_name': 'GPT_1.5B_Training_Dataset'},
 {'id': 'f0971f8b-bcc3-401f-b107-47c67

### Project creation

In [10]:
sambastudio_wrapper.create_project()

2024-07-24 17:44:22,297 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:44:22,297 [INFO] Project with name 'example project' already exists with id '51b1fe13-dcdb-41e3-8a78-514da36937c8', using it


'51b1fe13-dcdb-41e3-8a78-514da36937c8'

In [11]:
sambastudio_wrapper.list_projects()

[{'project_name': 'Benchmarking_Llama7b',
  'project_id': '8d752994-bfd1-4eee-98a8-9ca0d50b8b37',
  'status': 'Available',
  'user_id': 'amit.kushwaha'},
 {'project_name': 'example project',
  'project_id': '51b1fe13-dcdb-41e3-8a78-514da36937c8',
  'status': 'Available',
  'user_id': 'jorge.piedrahita'},
 {'project_name': 'kit_testing',
  'project_id': '3c1dd70d-9976-4c58-9f82-873d01ebeb3e',
  'status': 'Available',
  'user_id': 'luis.salazar'},
 {'project_name': 'Shared',
  'project_id': 'b9896d2e-5054-4937-978f-67413104d6ba',
  'status': 'Available',
  'user_id': 'varun.krishna'},
 {'project_name': 'SNSDK-E2E-Finetuning_Embeddings-Project',
  'project_id': 'e2d0c5dc-894e-461a-9730-a814904db9f7',
  'status': 'Available',
  'user_id': 'rodrigo.maldonado'},
 {'project_name': 'Test_Finetune_Embeddings',
  'project_id': '6f1d50bc-cd1b-4574-b901-70fe929f5aa4',
  'status': 'Available',
  'user_id': 'francesca.raimondi'}]

### Training Job creation

In [3]:
sambastudio_wrapper.run_job()

2024-07-24 17:45:25,313 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:45:26,286 [INFO] Model 'Llama-2-7b-chat-hf' with id '6090d4ac-a7bd-4c46-b417-7f8e42cf7bdb' available for training and deployment found
2024-07-24 17:45:26,535 [INFO] Dataset with name 'smol_sql_dataset' found with id 411c9192-509d-4044-8929-b0cedb4fcd43
2024-07-24 17:45:26,790 [INFO] Job with name 'snsdk_test_job2' created: '{'job_id': '7c1459bc-2f0c-44ef-bee1-5aa804109199', 'job_name': 'snsdk_test_job2', 'job_type': 'train', 'user_id': 'jorge.piedrahita', 'project_id': '51b1fe13-dcdb-41e3-8a78-514da36937c8', 'tenant_id': '41ceaded-9f08-47ae-aa02-15f39c899618', 'rdu_arch': 'sn20', 'result_path': '', 'parallel_instances': 1, 'app_id': 'ec012370-6ffa-4a3a-b230-2c62613f1d89', 'model_checkpoint': '6090d4ac-a7bd-4c46-b417-7f8e42cf7bdb', 'checkpoint_id': '', 'dataset_id': '411c9192-509d-4044-8929-b0cedb4fcd43', 'description': 'snsdk test training project', 'stat

'7c1459bc-2f0c-44ef-bee1-5aa804109199'

In [3]:
sambastudio_wrapper.check_job_progress()

2024-07-24 17:49:41,937 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:49:42,198 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:49:42,450 [INFO] Job with name 'snsdk_test_job' in project 'example project' found with id '385632fa-284c-42a2-892a-b1bfb6161a7e'
2024-07-24 17:49:42,730 [ERROR] Failed to check job progress. Details: {'detail': 'view job action not authorized for the user', 'status_code': 403}


Exception: Error message: {'detail': 'view job action not authorized for the user', 'status_code': 403}

### Model Checkpoint promotion

In [4]:
sambastudio_wrapper.list_checkpoints()

2024-07-24 17:49:51,596 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:49:51,844 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:49:52,093 [INFO] Job with name 'snsdk_test_job' in project 'example project' found with id '385632fa-284c-42a2-892a-b1bfb6161a7e'
2024-07-24 17:49:52,364 [ERROR] Failed to list checkpoints. Details: {'detail': 'view checkpoints action not authorized for the user', 'status_code': 403}


Exception: Error message: {'detail': 'view checkpoints action not authorized for the user', 'status_code': 403}

In [5]:
checkpoint_id = "385632fa-284c-42a2-892a-b1bfb6161a7e-50"
sambastudio_wrapper.promote_checkpoint(checkpoint_id)

2024-07-24 17:50:05,329 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:50:05,596 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:50:05,846 [INFO] Job with name 'snsdk_test_job' in project 'example project' found with id '385632fa-284c-42a2-892a-b1bfb6161a7e'
2024-07-24 17:50:06,122 [ERROR] Failed to promote checkpoint '385632fa-284c-42a2-892a-b1bfb6161a7e-50' to model. Details: {'detail': 'checkpoint promote action is not authorized for the user', 'status_code': 403}


Exception: Error message: {'detail': 'checkpoint promote action is not authorized for the user', 'status_code': 403}

In [6]:
sambastudio_wrapper.list_models(filter=["deploy"])

[{'model_id': '47ee0428-f9f8-6d8c-d02e-1ac778b83eb9',
  'model_checkpoint_name': 'CLIP ViT-B-32 Backbone (Deprecated)'},
 {'model_id': '35dfd99d-888b-4331-b78a-ea5c744224ad',
  'model_checkpoint_name': 'GPT_1.5B_Dialog_Act_Classification_Finetuned'},
 {'model_id': 'cb800e99-809b-42d2-a6e2-11b3707ef433',
  'model_checkpoint_name': 'GPT_13B_Human_Aligned_Instruction_Tuned_V2'},
 {'model_id': '0d3a9c87-99e8-4d22-af39-05ab91b84238',
  'model_checkpoint_name': 'GPT_13B_Generative_Inference'},
 {'model_id': 'c7be342b-208b-4393-b5c2-496aa54eb917',
  'model_checkpoint_name': 'GPT13B 2k SS HAv3'},
 {'model_id': '3cac564a-e822-46fb-a128-1ab34a9d93ed',
  'model_checkpoint_name': 'GPT13B 2k SS ITv3'},
 {'model_id': '673ea90a-3d25-43ed-9965-55c393e91ddc',
  'model_checkpoint_name': 'GPT_1.5B_GT_Finetuned'},
 {'model_id': '877fa680-b45f-4fff-a57a-0a3fcd2d0fd7',
  'model_checkpoint_name': 'GPT13B 8k SS HAv3'},
 {'model_id': 'c60a7f67-d9b2-46fd-9c9b-d3f02fb2b6e0',
  'model_checkpoint_name': 'GPT13B 8k

### Endpoint creation

In [8]:
sambastudio_wrapper.create_endpoint()

2024-07-24 17:52:26,578 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:52:26,851 [INFO] Model with name 'llama2_7b_fine_tuned_nstext2sql' found with id c828d8d6-bbc6-4315-8573-014a183ffa58
2024-07-24 17:52:27,059 [INFO] Endpoint with name 'test-endpoint-sql' not found in project '51b1fe13-dcdb-41e3-8a78-514da36937c8'
2024-07-24 17:52:27,303 [INFO] Endpoint 'test-endpoint-sql' created


'2cc9e176-001d-407c-8170-010dcad06555'

In [10]:
sambastudio_wrapper.get_endpoint_details()

2024-07-24 17:53:02,674 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8


{'status': 'SettingUp',
 'url': '/api/predict/generic/51b1fe13-dcdb-41e3-8a78-514da36937c8/2cc9e176-001d-407c-8170-010dcad06555',
 'langchain wrapper env': {'SAMBASTUDIO_BASE_URL': 'https://sjc3-demo1.sambanova.net',
  'SAMBASTUDIO_BASE_URI': 'api/predict/generic',
  'SAMBASTUDIO_PROJECT_ID': '51b1fe13-dcdb-41e3-8a78-514da36937c8',
  'SAMBASTUDIO_ENDPOINT_ID': '2cc9e176-001d-407c-8170-010dcad06555',
  'SAMBASTUDIO_API_KEY': '4561018c-5e67-4ebb-8aa9-c2c0a859ed09'}}

## Wrapper usage without config file

In [2]:
sambastudio_wrapper = SnsdkWrapper()

2024-07-24 18:01:08,584 [INFO] Using variables from Snapi config to set up Snsdk.


### Dataset preparation

In [None]:
dataset_path =sambastudio_utils.gen_data_prep_pipeline(
    input_files = os.path.join(repo_dir, "fine_tuning_sql", "data" , "pre-training", "pretrain-squad-smol-sql.jsonl"), # set with your jsonl path or list of jsonl file paths
    output_path = os.path.join(current_dir,"sambastudio_fine_tuning_dataset"),
    tokenizer = "meta-llama/Llama-2-7b-hf",
    max_seq_length = 4096,
    )

### Dataset Upload

In [13]:
sambastudio_wrapper.create_dataset(
    dataset_name = "smol_sql_pretraining_mm",
    dataset_description = "test_dataset for sql finetuning",
    dataset_path = dataset_path,
    dataset_job_types = ["evaluation", "train"],
    dataset_apps_availability = ['Mistral', 'Llama 3', 'Llama 2 with dynamic batching', 'Llama 2 7B', 'Llama 2 70B with dynamic batching', 'Llama 2 70B', 'Llama 2 13B'],
    dataset_source_type = "localMachine",
    dataset_language = "english",
    dataset_filetype = "hdf5",
    dataset_url = "",
    dataset_metadata={
        "labels_file": "",
        "train_filepath": "",
        "validation_filepath": "",
        "test_filepath": ""
    }
)

2024-07-24 17:56:56,566 [INFO] App with name 'Mistral' found with id ab46c162-ff57-4700-beae-3ded6065ee7a
2024-07-24 17:56:56,745 [INFO] App with name 'Llama 3' found with id ad39e323-9878-4914-8e29-82c9f2939475
2024-07-24 17:56:56,919 [INFO] App with name 'Llama 2 with dynamic batching' found with id 2a633d60-ed93-47e8-b2d4-3a5aa345e320
2024-07-24 17:56:57,104 [INFO] App with name 'Llama 2 7B' found with id ec012370-6ffa-4a3a-b230-2c62613f1d89
2024-07-24 17:56:57,286 [INFO] App with name 'Llama 2 70B with dynamic batching' found with id 0b5871de-f335-43c6-a718-1300c1ef02b8
2024-07-24 17:56:57,464 [INFO] App with name 'Llama 2 70B' found with id 82254d3b-7239-458b-9da8-da1aca9b7fba
2024-07-24 17:56:57,676 [INFO] App with name 'Llama 2 13B' found with id 1bf617cb-8afb-4bbd-b92f-c15ebfdca10b
2024-07-24 17:56:57,928 [INFO] Dataset with name 'smol_sql_pretraining_mm' found with id d80f6355-af9d-406b-95c5-c31854f36f2c
2024-07-24 17:56:57,929 [INFO] Dataset with name 'smol_sql_pretraining_mm

'd80f6355-af9d-406b-95c5-c31854f36f2c'

In [14]:
sambastudio_wrapper.list_datasets()

[{'id': '1ea54b9c-f1f6-49d4-91f6-e230eec7c259',
  'dataset_name': 'Caltech_256_Clip'},
 {'id': '7198371d-0f70-4287-8645-8e3807782d30',
  'dataset_name': 'Coding_Generative_Inference_Dataset'},
 {'id': '0e277df5-0b7e-45a7-874e-c5b10364c0fe',
  'dataset_name': 'Coding_Generative_Train_4k_SS_Dataset'},
 {'id': '69d0d494-bc0a-4e1c-8dd3-84b1f55b0098', 'dataset_name': 'FiQA'},
 {'id': '17be1fa1-2ed2-4503-9ccc-99be2c905010',
  'dataset_name': 'Generative_Inference_Dataset'},
 {'id': '9bac6a44-92b2-40ac-9a4d-ba72038ac27b',
  'dataset_name': 'GPT_13B_8k_SS_Toy_Training_Dataset'},
 {'id': '6090aaa0-85ef-469f-ad2d-7bf1a5f47f1f',
  'dataset_name': 'GPT_13B_Inference_Dataset'},
 {'id': '894dd158-9552-11ed-a1eb-0242ac120002',
  'dataset_name': 'GPT_13B_Training_Dataset'},
 {'id': 'f66434c5-4944-48a8-81c9-6eb7fca7e4b9',
  'dataset_name': 'GPT_1.5B_Inference_Dataset'},
 {'id': 'd8e19905-bd36-4469-b13b-51da48b62e75',
  'dataset_name': 'GPT_1.5B_Training_Dataset'},
 {'id': 'f0971f8b-bcc3-401f-b107-47c67

### Project creation

In [15]:
sambastudio_wrapper.create_project(
    project_name = "example project",
    project_description = "this project will be used to test the Finetuning e2e pipeline implementation"
)

2024-07-24 17:57:24,625 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 17:57:24,626 [INFO] Project with name 'example project' already exists with id '51b1fe13-dcdb-41e3-8a78-514da36937c8', using it


'51b1fe13-dcdb-41e3-8a78-514da36937c8'

In [16]:
sambastudio_wrapper.list_projects()

[{'project_name': 'Benchmarking_Llama7b',
  'project_id': '8d752994-bfd1-4eee-98a8-9ca0d50b8b37',
  'status': 'Available',
  'user_id': 'amit.kushwaha'},
 {'project_name': 'example project',
  'project_id': '51b1fe13-dcdb-41e3-8a78-514da36937c8',
  'status': 'Available',
  'user_id': 'jorge.piedrahita'},
 {'project_name': 'kit_testing',
  'project_id': '3c1dd70d-9976-4c58-9f82-873d01ebeb3e',
  'status': 'Available',
  'user_id': 'luis.salazar'},
 {'project_name': 'Shared',
  'project_id': 'b9896d2e-5054-4937-978f-67413104d6ba',
  'status': 'Available',
  'user_id': 'varun.krishna'},
 {'project_name': 'SNSDK-E2E-Finetuning_Embeddings-Project',
  'project_id': 'e2d0c5dc-894e-461a-9730-a814904db9f7',
  'status': 'Available',
  'user_id': 'rodrigo.maldonado'},
 {'project_name': 'Test_Finetune_Embeddings',
  'project_id': '6f1d50bc-cd1b-4574-b901-70fe929f5aa4',
  'status': 'Available',
  'user_id': 'francesca.raimondi'}]

### Training Job creation

#### List trainable models 

In [3]:
sambastudio_wrapper.list_models(filter_job_types=["train","deploy"])

[{'model_id': '47ee0428-f9f8-6d8c-d02e-1ac778b83eb9',
  'model_checkpoint_name': 'CLIP ViT-B-32 Backbone (Deprecated)'},
 {'model_id': '35dfd99d-888b-4331-b78a-ea5c744224ad',
  'model_checkpoint_name': 'GPT_1.5B_Dialog_Act_Classification_Finetuned'},
 {'model_id': 'cb800e99-809b-42d2-a6e2-11b3707ef433',
  'model_checkpoint_name': 'GPT_13B_Human_Aligned_Instruction_Tuned_V2'},
 {'model_id': '0d3a9c87-99e8-4d22-af39-05ab91b84238',
  'model_checkpoint_name': 'GPT_13B_Generative_Inference'},
 {'model_id': 'c7be342b-208b-4393-b5c2-496aa54eb917',
  'model_checkpoint_name': 'GPT13B 2k SS HAv3'},
 {'model_id': '3cac564a-e822-46fb-a128-1ab34a9d93ed',
  'model_checkpoint_name': 'GPT13B 2k SS ITv3'},
 {'model_id': '673ea90a-3d25-43ed-9965-55c393e91ddc',
  'model_checkpoint_name': 'GPT_1.5B_GT_Finetuned'},
 {'model_id': '877fa680-b45f-4fff-a57a-0a3fcd2d0fd7',
  'model_checkpoint_name': 'GPT13B 8k SS HAv3'},
 {'model_id': 'c60a7f67-d9b2-46fd-9c9b-d3f02fb2b6e0',
  'model_checkpoint_name': 'GPT13B 8k

#### List available datasets

In [5]:
sambastudio_wrapper.list_datasets()

[{'id': '1ea54b9c-f1f6-49d4-91f6-e230eec7c259',
  'dataset_name': 'Caltech_256_Clip'},
 {'id': '7198371d-0f70-4287-8645-8e3807782d30',
  'dataset_name': 'Coding_Generative_Inference_Dataset'},
 {'id': '0e277df5-0b7e-45a7-874e-c5b10364c0fe',
  'dataset_name': 'Coding_Generative_Train_4k_SS_Dataset'},
 {'id': '69d0d494-bc0a-4e1c-8dd3-84b1f55b0098', 'dataset_name': 'FiQA'},
 {'id': '17be1fa1-2ed2-4503-9ccc-99be2c905010',
  'dataset_name': 'Generative_Inference_Dataset'},
 {'id': '9bac6a44-92b2-40ac-9a4d-ba72038ac27b',
  'dataset_name': 'GPT_13B_8k_SS_Toy_Training_Dataset'},
 {'id': '6090aaa0-85ef-469f-ad2d-7bf1a5f47f1f',
  'dataset_name': 'GPT_13B_Inference_Dataset'},
 {'id': '894dd158-9552-11ed-a1eb-0242ac120002',
  'dataset_name': 'GPT_13B_Training_Dataset'},
 {'id': 'f66434c5-4944-48a8-81c9-6eb7fca7e4b9',
  'dataset_name': 'GPT_1.5B_Inference_Dataset'},
 {'id': 'd8e19905-bd36-4469-b13b-51da48b62e75',
  'dataset_name': 'GPT_1.5B_Training_Dataset'},
 {'id': 'f0971f8b-bcc3-401f-b107-47c67

#### Create training job

In [None]:
sambastudio_wrapper.run_job(
    project_name="example project",
    job_name="snsdk_test_job",
    job_description="snsdk test training project",
    job_type="train",
    model="Llama-2-7b-chat-hf",
    dataset_name="smol_sql_pretraining_mm",
    parallel_instances=1,
    load_state=False,
    sub_path="",
    rdu_arch="sn20",
    hyperparams={
        "batch_size": 256,
        "do_eval": False,
        "eval_steps": 50,
        "evaluation_strategy": "no",
        "learning_rate": 0.00001,
        "logging_steps": 1,
        "lr_schedule": "fixed_lr",
        "max_sequence_length": 4096,
        "num_iterations": 100,
        "prompt_loss_weight": 0.0,
        "save_optimizer_state": True,
        "save_steps": 50,
        "skip_checkpoint": False,
        "subsample_eval": 0.01,
        "subsample_eval_seed": 123,
        "use_token_type_ids": True,
        "vocab_size": 32000,
        "warmup_steps": 0,
        "weight_decay": 0.1,
    }
)

In [None]:
sambastudio_wrapper.check_job_progress(
    project_name="example project",
    job_name="snsdk_test_job"
)

### Checkpoint promotion

In [None]:
sambastudio_wrapper.list_checkpoints(
    project_name="example project",
    job_name="snsdk_test_job"
)

In [None]:
checkpoint_id = "385632fa-284c-42a2-892a-b1bfb6161a7e-50"
sambastudio_wrapper.promote_checkpoint(
    checkpoint_id = checkpoint_id,
    project_name="example project",
    job_name="snsdk_test_job",
    model_name="llama2_7b_fine_tuned_nstext2sql",
    model_description="finetuned llama2_7b model for nstext2sql",
    model_type="finetuned"
)

In [12]:
sambastudio_wrapper.list_models(filter_job_types=["deploy"])

[{'model_id': '47ee0428-f9f8-6d8c-d02e-1ac778b83eb9',
  'model_checkpoint_name': 'CLIP ViT-B-32 Backbone (Deprecated)'},
 {'model_id': '35dfd99d-888b-4331-b78a-ea5c744224ad',
  'model_checkpoint_name': 'GPT_1.5B_Dialog_Act_Classification_Finetuned'},
 {'model_id': 'cb800e99-809b-42d2-a6e2-11b3707ef433',
  'model_checkpoint_name': 'GPT_13B_Human_Aligned_Instruction_Tuned_V2'},
 {'model_id': '0d3a9c87-99e8-4d22-af39-05ab91b84238',
  'model_checkpoint_name': 'GPT_13B_Generative_Inference'},
 {'model_id': 'c7be342b-208b-4393-b5c2-496aa54eb917',
  'model_checkpoint_name': 'GPT13B 2k SS HAv3'},
 {'model_id': '3cac564a-e822-46fb-a128-1ab34a9d93ed',
  'model_checkpoint_name': 'GPT13B 2k SS ITv3'},
 {'model_id': '673ea90a-3d25-43ed-9965-55c393e91ddc',
  'model_checkpoint_name': 'GPT_1.5B_GT_Finetuned'},
 {'model_id': '877fa680-b45f-4fff-a57a-0a3fcd2d0fd7',
  'model_checkpoint_name': 'GPT13B 8k SS HAv3'},
 {'model_id': 'c60a7f67-d9b2-46fd-9c9b-d3f02fb2b6e0',
  'model_checkpoint_name': 'GPT13B 8k

### Endpoint creation

In [13]:
sambastudio_wrapper.create_endpoint(
    project_name="example project",
    endpoint_name="test-endpoint-sql",
    endpoint_description="endpoint of finetuned sql model llama2 7b",
    model_name="llama2_7b_fine_tuned_nstext2sql",
    instances=1,
    rdu_arch="sn20",
    hyperparams={}
)

2024-07-24 18:02:50,297 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8
2024-07-24 18:02:50,541 [INFO] Model with name 'llama2_7b_fine_tuned_nstext2sql' found with id c828d8d6-bbc6-4315-8573-014a183ffa58
2024-07-24 18:02:50,737 [INFO] Endpoint with name 'test-endpoint-sql' not created it already exist with id 2cc9e176-001d-407c-8170-010dcad06555


'2cc9e176-001d-407c-8170-010dcad06555'

In [14]:
sambastudio_wrapper.get_endpoint_details(
    project_name="example project",
    endpoint_name="test-endpoint-sql"
)

2024-07-24 18:02:54,063 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8


{'status': 'Live',
 'url': '/api/predict/generic/51b1fe13-dcdb-41e3-8a78-514da36937c8/2cc9e176-001d-407c-8170-010dcad06555',
 'langchain wrapper env': {'SAMBASTUDIO_BASE_URL': 'https://sjc3-demo1.sambanova.net',
  'SAMBASTUDIO_BASE_URI': 'api/predict/generic',
  'SAMBASTUDIO_PROJECT_ID': '51b1fe13-dcdb-41e3-8a78-514da36937c8',
  'SAMBASTUDIO_ENDPOINT_ID': '2cc9e176-001d-407c-8170-010dcad06555',
  'SAMBASTUDIO_API_KEY': '4561018c-5e67-4ebb-8aa9-c2c0a859ed09'}}

## Deployed endpoint usage

In [18]:
from langchain_community.llms import SambaStudio

endpoint_env = sambastudio_wrapper.get_endpoint_details()["langchain wrapper env"]

llm = SambaStudio(
    sambastudio_base_url=endpoint_env.get("SAMBASTUDIO_BASE_URL"),
    sambastudio_base_uri=endpoint_env.get("SAMBASTUDIO_BASE_URI"),
    sambastudio_project_id=endpoint_env.get("SAMBASTUDIO_PROJECT_ID"),
    sambastudio_endpoint_id=endpoint_env.get("SAMBASTUDIO_ENDPOINT_ID"),
    sambastudio_api_key=endpoint_env.get("SAMBASTUDIO_API_KEY"),
    model_kwargs = {
        "do_sample": True, 
        "temperature": 0.01,
        "max_tokens_to_generate": 512
    }
)

2024-07-24 18:03:51,707 [INFO] Project with name 'example project' found with id 51b1fe13-dcdb-41e3-8a78-514da36937c8


In [20]:
prompt = """[INST]<<SYS>>
    Generate a query using valid SQLite to answer the following questions for the summarized tables schemas provided bellow.
    Do not assume the values on the database tables before generating the SQL query, always generate a SQL that query what is asked. 
    The query must be in the format: ```sql
query
```
    
    Example:
    
    ```sql
    SELECT * FROM mainTable;
    ```
    <</SYS>>
        
    
CREATE TABLE "Album" (
	"AlbumId" INTEGER NOT NULL, 
	"Title" NVARCHAR(160) NOT NULL, 
	"ArtistId" INTEGER NOT NULL, 
	PRIMARY KEY ("AlbumId"), 
	FOREIGN KEY("ArtistId") REFERENCES "Artist" ("ArtistId")
)

/*
3 rows from Album table:
AlbumId	Title	ArtistId
1	For Those About To Rock We Salute You	1
2	Balls to the Wall	2
3	Restless and Wild	2
*/


CREATE TABLE "Artist" (
	"ArtistId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("ArtistId")
)

/*
3 rows from Artist table:
ArtistId	Name
1	AC/DC
2	Accept
3	Aerosmith
*/


CREATE TABLE "Customer" (
	"CustomerId" INTEGER NOT NULL, 
	"FirstName" NVARCHAR(40) NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"Company" NVARCHAR(80), 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60) NOT NULL, 
	"SupportRepId" INTEGER, 
	PRIMARY KEY ("CustomerId"), 
	FOREIGN KEY("SupportRepId") REFERENCES "Employee" ("EmployeeId")
)

/*
3 rows from Customer table:
CustomerId	FirstName	LastName	Company	Address	City	State	Country	PostalCode	Phone	Fax	Email	SupportRepId
1	Luís	Gonçalves	Embraer - Empresa Brasileira de Aeronáutica S.A.	Av. Brigadeiro Faria Lima, 2170	São José dos Campos	SP	Brazil	12227-000	+55 (12) 3923-5555	+55 (12) 3923-5566	luisg@embraer.com.br	3
2	Leonie	Köhler	None	Theodor-Heuss-Straße 34	Stuttgart	None	Germany	70174	+49 0711 2842222	None	leonekohler@surfeu.de	5
3	François	Tremblay	None	1498 rue Bélanger	Montréal	QC	Canada	H2G 1A7	+1 (514) 721-4711	None	ftremblay@gmail.com	3
*/


CREATE TABLE "Employee" (
	"EmployeeId" INTEGER NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"FirstName" NVARCHAR(20) NOT NULL, 
	"Title" NVARCHAR(30), 
	"ReportsTo" INTEGER, 
	"BirthDate" DATETIME, 
	"HireDate" DATETIME, 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60), 
	PRIMARY KEY ("EmployeeId"), 
	FOREIGN KEY("ReportsTo") REFERENCES "Employee" ("EmployeeId")
)

/*
3 rows from Employee table:
EmployeeId	LastName	FirstName	Title	ReportsTo	BirthDate	HireDate	Address	City	State	Country	PostalCode	Phone	Fax	Email
1	Adams	Andrew	General Manager	None	1962-02-18 00:00:00	2002-08-14 00:00:00	11120 Jasper Ave NW	Edmonton	AB	Canada	T5K 2N1	+1 (780) 428-9482	+1 (780) 428-3457	andrew@chinookcorp.com
2	Edwards	Nancy	Sales Manager	1	1958-12-08 00:00:00	2002-05-01 00:00:00	825 8 Ave SW	Calgary	AB	Canada	T2P 2T3	+1 (403) 262-3443	+1 (403) 262-3322	nancy@chinookcorp.com
3	Peacock	Jane	Sales Support Agent	2	1973-08-29 00:00:00	2002-04-01 00:00:00	1111 6 Ave SW	Calgary	AB	Canada	T2P 5M5	+1 (403) 262-3443	+1 (403) 262-6712	jane@chinookcorp.com
*/


CREATE TABLE "Genre" (
	"GenreId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("GenreId")
)

/*
3 rows from Genre table:
GenreId	Name
1	Rock
2	Jazz
3	Metal
*/


CREATE TABLE "Invoice" (
	"InvoiceId" INTEGER NOT NULL, 
	"CustomerId" INTEGER NOT NULL, 
	"InvoiceDate" DATETIME NOT NULL, 
	"BillingAddress" NVARCHAR(70), 
	"BillingCity" NVARCHAR(40), 
	"BillingState" NVARCHAR(40), 
	"BillingCountry" NVARCHAR(40), 
	"BillingPostalCode" NVARCHAR(10), 
	"Total" NUMERIC(10, 2) NOT NULL, 
	PRIMARY KEY ("InvoiceId"), 
	FOREIGN KEY("CustomerId") REFERENCES "Customer" ("CustomerId")
)

/*
3 rows from Invoice table:
InvoiceId	CustomerId	InvoiceDate	BillingAddress	BillingCity	BillingState	BillingCountry	BillingPostalCode	Total
1	2	2021-01-01 00:00:00	Theodor-Heuss-Straße 34	Stuttgart	None	Germany	70174	1.98
2	4	2021-01-02 00:00:00	Ullevålsveien 14	Oslo	None	Norway	0171	3.96
3	8	2021-01-03 00:00:00	Grétrystraat 63	Brussels	None	Belgium	1000	5.94
*/


CREATE TABLE "InvoiceLine" (
	"InvoiceLineId" INTEGER NOT NULL, 
	"InvoiceId" INTEGER NOT NULL, 
	"TrackId" INTEGER NOT NULL, 
	"UnitPrice" NUMERIC(10, 2) NOT NULL, 
	"Quantity" INTEGER NOT NULL, 
	PRIMARY KEY ("InvoiceLineId"), 
	FOREIGN KEY("TrackId") REFERENCES "Track" ("TrackId"), 
	FOREIGN KEY("InvoiceId") REFERENCES "Invoice" ("InvoiceId")
)

/*
3 rows from InvoiceLine table:
InvoiceLineId	InvoiceId	TrackId	UnitPrice	Quantity
1	1	2	0.99	1
2	1	4	0.99	1
3	2	6	0.99	1
*/


CREATE TABLE "MediaType" (
	"MediaTypeId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("MediaTypeId")
)

/*
3 rows from MediaType table:
MediaTypeId	Name
1	MPEG audio file
2	Protected AAC audio file
3	Protected MPEG-4 video file
*/


CREATE TABLE "Playlist" (
	"PlaylistId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("PlaylistId")
)

/*
3 rows from Playlist table:
PlaylistId	Name
1	Music
2	Movies
3	TV Shows
*/


CREATE TABLE "PlaylistTrack" (
	"PlaylistId" INTEGER NOT NULL, 
	"TrackId" INTEGER NOT NULL, 
	PRIMARY KEY ("PlaylistId", "TrackId"), 
	FOREIGN KEY("TrackId") REFERENCES "Track" ("TrackId"), 
	FOREIGN KEY("PlaylistId") REFERENCES "Playlist" ("PlaylistId")
)

/*
3 rows from PlaylistTrack table:
PlaylistId	TrackId
1	3402
1	3389
1	3390
*/


CREATE TABLE "Track" (
	"TrackId" INTEGER NOT NULL, 
	"Name" NVARCHAR(200) NOT NULL, 
	"AlbumId" INTEGER, 
	"MediaTypeId" INTEGER NOT NULL, 
	"GenreId" INTEGER, 
	"Composer" NVARCHAR(220), 
	"Milliseconds" INTEGER NOT NULL, 
	"Bytes" INTEGER, 
	"UnitPrice" NUMERIC(10, 2) NOT NULL, 
	PRIMARY KEY ("TrackId"), 
	FOREIGN KEY("MediaTypeId") REFERENCES "MediaType" ("MediaTypeId"), 
	FOREIGN KEY("GenreId") REFERENCES "Genre" ("GenreId"), 
	FOREIGN KEY("AlbumId") REFERENCES "Album" ("AlbumId")
)

/*
3 rows from Track table:
TrackId	Name	AlbumId	MediaTypeId	GenreId	Composer	Milliseconds	Bytes	UnitPrice
1	For Those About To Rock (We Salute You)	1	1	1	Angus Young, Malcolm Young, Brian Johnson	343719	11170334	0.99
2	Balls to the Wall	2	2	1	U. Dirkschneider, W. Hoffmann, H. Frank, P. Baltes, S. Kaufmann, G. Hoffmann	342562	5510424	0.99
3	Fast As a Shark	3	2	1	F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman	230619	3990994	0.99
*/
        
    how many music genres are in the db?
    [/INST]"""

In [21]:
llm.invoke(prompt)

' SELECT COUNT(*) FROM "Genre"'