In [4]:
!pip install -qqqU sagemaker wandb datasets transformers

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
distributed 2022.7.0 requires tornado<6.2,>=6.0.3, but you have tornado 6.3.3 which is incompatible.[0m[31m
[0m

## Preparing Data

A big part of training LLMs lives in getting the data formatted correctly!

In [90]:
import json
import wandb
from itertools import chain

api = wandb.Api()
question_artifacts = api.artifact("wandbot/wandbot-eval/run-kinbxic4-responses:v0")

with open(question_artifacts.file()) as f:
    data = json.load(f)

columns = data["columns"]
data = data["data"]
eval_df = pd.DataFrame(data, columns=columns)

In [91]:
eval_df.head()

Unnamed: 0,query,retrieved_context,generated_response
0,Hey I have a question about using wandb with f...,"[`wandb.init()` returns a run object, and you ...",The initialization of `wandb.init()` should be...
1,Hey with wandb is it possible to link from the...,[### How do I best log models from runs in a s...,"Yes, you can link to the best run from a sweep..."
2,I am training a spacy textcat model. This proc...,[### Model Architecture\n\nOur config also def...,To log the best model's metrics instead of the...
3,Explain how I can version datasets with Weight...,[ # A 3-in-1 Intro to Weights & Biases: Comput...,Weights & Biases provides a feature called Art...
4,I'm using env = SubprocVecEnv(\n [mak...,[## WandbCallback ÂºïÊï∞\n\n| ÂºïÊï∞ | ‰ΩøÁî®Ê≥ï | | --- | -...,To record a video for a specific subprocess en...


Let's remove retrieved Japanese text

In [92]:
def contains_japanese(text):
    for char in text:
        if '‰∏Ä' <= char <= 'Èæ•':
            return True
    return False

In [93]:
s = "## WandbCallback ÂºïÊï∞\n\n| ÂºïÊï∞ | ‰ΩøÁî®Ê≥ï |"

In [94]:
eval_df["retrieved_context_en"] = [[ctx for ctx in ctxs if not contains_japanese(ctx)] for ctxs in eval_df.retrieved_context.values]

In [95]:
eval_df.head()

Unnamed: 0,query,retrieved_context,generated_response,retrieved_context_en
0,Hey I have a question about using wandb with f...,"[`wandb.init()` returns a run object, and you ...",The initialization of `wandb.init()` should be...,"[`wandb.init()` returns a run object, and you ..."
1,Hey with wandb is it possible to link from the...,[### How do I best log models from runs in a s...,"Yes, you can link to the best run from a sweep...",[### How do I best log models from runs in a s...
2,I am training a spacy textcat model. This proc...,[### Model Architecture\n\nOur config also def...,To log the best model's metrics instead of the...,[### Model Architecture\n\nOur config also def...
3,Explain how I can version datasets with Weight...,[ # A 3-in-1 Intro to Weights & Biases: Comput...,Weights & Biases provides a feature called Art...,[ # A 3-in-1 Intro to Weights & Biases: Comput...
4,I'm using env = SubprocVecEnv(\n [mak...,[## WandbCallback ÂºïÊï∞\n\n| ÂºïÊï∞ | ‰ΩøÁî®Ê≥ï | | --- | -...,To record a video for a specific subprocess en...,[## Basic Example\n\nThe W&B SB3 integration u...


In [96]:
eval_df["retrieved_context_stuff"] = ["\n".join(ctxs) for ctxs in eval_df.retrieved_context_en.values]

In [97]:
eval_df.head()

Unnamed: 0,query,retrieved_context,generated_response,retrieved_context_en,retrieved_context_stuff
0,Hey I have a question about using wandb with f...,"[`wandb.init()` returns a run object, and you ...",The initialization of `wandb.init()` should be...,"[`wandb.init()` returns a run object, and you ...","`wandb.init()` returns a run object, and you c..."
1,Hey with wandb is it possible to link from the...,[### How do I best log models from runs in a s...,"Yes, you can link to the best run from a sweep...",[### How do I best log models from runs in a s...,### How do I best log models from runs in a sw...
2,I am training a spacy textcat model. This proc...,[### Model Architecture\n\nOur config also def...,To log the best model's metrics instead of the...,[### Model Architecture\n\nOur config also def...,### Model Architecture\n\nOur config also defi...
3,Explain how I can version datasets with Weight...,[ # A 3-in-1 Intro to Weights & Biases: Comput...,Weights & Biases provides a feature called Art...,[ # A 3-in-1 Intro to Weights & Biases: Comput...,# A 3-in-1 Intro to Weights & Biases: Compute...
4,I'm using env = SubprocVecEnv(\n [mak...,[## WandbCallback ÂºïÊï∞\n\n| ÂºïÊï∞ | ‰ΩøÁî®Ê≥ï | | --- | -...,To record a video for a specific subprocess en...,[## Basic Example\n\nThe W&B SB3 integration u...,## Basic Example\n\nThe W&B SB3 integration us...


In [98]:
eval_df = eval_df.assign(tokens = eval_df['retrieved_context_stuff'].str.len())

In [99]:
eval_df.head()

Unnamed: 0,query,retrieved_context,generated_response,retrieved_context_en,retrieved_context_stuff,tokens
0,Hey I have a question about using wandb with f...,"[`wandb.init()` returns a run object, and you ...",The initialization of `wandb.init()` should be...,"[`wandb.init()` returns a run object, and you ...","`wandb.init()` returns a run object, and you c...",15505
1,Hey with wandb is it possible to link from the...,[### How do I best log models from runs in a s...,"Yes, you can link to the best run from a sweep...",[### How do I best log models from runs in a s...,### How do I best log models from runs in a sw...,13161
2,I am training a spacy textcat model. This proc...,[### Model Architecture\n\nOur config also def...,To log the best model's metrics instead of the...,[### Model Architecture\n\nOur config also def...,### Model Architecture\n\nOur config also defi...,17812
3,Explain how I can version datasets with Weight...,[ # A 3-in-1 Intro to Weights & Biases: Comput...,Weights & Biases provides a feature called Art...,[ # A 3-in-1 Intro to Weights & Biases: Comput...,# A 3-in-1 Intro to Weights & Biases: Compute...,12755
4,I'm using env = SubprocVecEnv(\n [mak...,[## WandbCallback ÂºïÊï∞\n\n| ÂºïÊï∞ | ‰ΩøÁî®Ê≥ï | | --- | -...,To record a video for a specific subprocess en...,[## Basic Example\n\nThe W&B SB3 integration u...,## Basic Example\n\nThe W&B SB3 integration us...,15480


In [107]:
eval_df = eval_df[["query", "generated_response", "retrieved_context_stuff", "tokens"]]

In [108]:
eval_df.columns = ["question", "answer", "retrieved_context", "char_len"]

In [109]:
eval_df.head()

Unnamed: 0,question,answer,retrieved_context,char_len
0,Hey I have a question about using wandb with f...,The initialization of `wandb.init()` should be...,"`wandb.init()` returns a run object, and you c...",15505
1,Hey with wandb is it possible to link from the...,"Yes, you can link to the best run from a sweep...",### How do I best log models from runs in a sw...,13161
2,I am training a spacy textcat model. This proc...,To log the best model's metrics instead of the...,### Model Architecture\n\nOur config also defi...,17812
3,Explain how I can version datasets with Weight...,Weights & Biases provides a feature called Art...,# A 3-in-1 Intro to Weights & Biases: Compute...,12755
4,I'm using env = SubprocVecEnv(\n [mak...,To record a video for a specific subprocess en...,## Basic Example\n\nThe W&B SB3 integration us...,15480


In [110]:
eval_df.to_json("eval_wandbot.json")

In [111]:
at = wandb.Artifact(
    name="wandbot_eval_dataset", 
    type="dataset",
    description="A wandbot dataset of questions and answers about W&B for evaluation")

In [112]:
with wandb.init(project="aws_llm_demo"):
    at.add_file("eval_wandbot.json")
    wandb.log_artifact(at)



VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112306155539247, max=1.0‚Ä¶



VBox(children=(Label(value='2.239 MB of 2.239 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max‚Ä¶

## Prepare the Dataset

In [113]:
api = wandb.Api()
dataset_artifact = api.artifact('capecape/wandbot/run-c1loi9jl-wandbot_questions:v0')

In [116]:
table = dataset_artifact.get("wandbot_questions")

[34m[1mwandb[0m:   1 of 1 files downloaded.  


this is a W&B table, so we can convert it to whatever format we may need

In [117]:
df = pd.DataFrame(table.data, columns=table.columns)
df.head()

Unnamed: 0,context,question,answer,page_content,metadata
0,A user has just started using the Weights & Bi...,What is a 'run' in W&B and what can I use it for?,A 'run' in W&B is the fundamental unit that yo...,import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source..."
1,,,,import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source..."
2,A user has just started using W&B and they are...,Hi! I'm new to W&B and I'm a bit stuck. Can yo...,"Sure, you can install the W&B library on your ...",import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source..."
3,,,,import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source..."
4,The user is getting started with Weights and B...,I need to track my experiment's hyperparameter...,Certainly! You can pass your hyperparameters t...,import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source..."


In [118]:
df = df.dropna()
df.head()

Unnamed: 0,context,question,answer,page_content,metadata
0,A user has just started using the Weights & Bi...,What is a 'run' in W&B and what can I use it for?,A 'run' in W&B is the fundamental unit that yo...,import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source..."
2,A user has just started using W&B and they are...,Hi! I'm new to W&B and I'm a bit stuck. Can yo...,"Sure, you can install the W&B library on your ...",import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source..."
4,The user is getting started with Weights and B...,I need to track my experiment's hyperparameter...,Certainly! You can pass your hyperparameters t...,import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source..."
5,A user is trying to do some mathematical opera...,How do I multiply two numbers using W&B functi...,"To multiply two numbers, you can use the `numb...",Value\n\n\nWhether the two values are not equ...,"{'file_type': '.md', 'language': 'en', 'source..."
6,A user has been working on making sense of a d...,"Hi, I have some confusing numbers that represe...","Yes, there is a function in W&B that allows yo...",Value\n\n\nWhether the two values are not equ...,"{'file_type': '.md', 'language': 'en', 'source..."


In [129]:
df.assign(context_len = lambda df: df.page_content.str.len()/3.6)

Unnamed: 0,context,question,answer,page_content,metadata,context_len
0,A user has just started using the Weights & Bi...,What is a 'run' in W&B and what can I use it for?,A 'run' in W&B is the fundamental unit that yo...,import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source...",1291.111111
2,A user has just started using W&B and they are...,Hi! I'm new to W&B and I'm a bit stuck. Can yo...,"Sure, you can install the W&B library on your ...",import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source...",1291.111111
4,The user is getting started with Weights and B...,I need to track my experiment's hyperparameter...,Certainly! You can pass your hyperparameters t...,import Tabs from ‚Äò@theme/Tabs‚Äô; \n\nimport Ta...,"{'file_type': '.md', 'language': 'en', 'source...",1291.111111
5,A user is trying to do some mathematical opera...,How do I multiply two numbers using W&B functi...,"To multiply two numbers, you can use the `numb...",Value\n\n\nWhether the two values are not equ...,"{'file_type': '.md', 'language': 'en', 'source...",2186.111111
6,A user has been working on making sense of a d...,"Hi, I have some confusing numbers that represe...","Yes, there is a function in W&B that allows yo...",Value\n\n\nWhether the two values are not equ...,"{'file_type': '.md', 'language': 'en', 'source...",2186.111111
...,...,...,...,...,...,...
1588,The user is running a fine-tuning task using t...,I've completed fine-tuning my GPT-3 model and ...,You can evaluate your fine-tuned model by crea...,"3, just skip this section!\n\n\nOtherwise let‚Äô...","{'file_type': '.ipynb', 'language': 'python', ...",2247.500000
1589,The user may be working with a project where t...,How can I log my fine-tunes to Weights & Biases?,You can log your fine-tunes to Weights & Biase...,"3, just skip this section!\n\n\nOtherwise let‚Äô...","{'file_type': '.ipynb', 'language': 'python', ...",2247.500000
1590,A user is training a model using OpenAI and is...,How can I log my OpenAI fine-tuning job to Wei...,To log your OpenAI fine-tuning job to Weights ...,": {np.mean(values)}, {np.median(values)}"")\n ...","{'file_type': '.ipynb', 'language': 'python', ...",2228.611111
1592,A user is trying to fine-tune a model using Op...,Can I keep track of my OpenAI fine-tuning jobs...,"Yes, you can log your fine-tuning jobs to W&B ...",": {np.mean(values)}, {np.median(values)}"")\n ...","{'file_type': '.ipynb', 'language': 'python', ...",2228.611111


Let's prepare the training dataset now

In [130]:
def prompt_template(row):
    return ("You are an AI assistant designed to assist developers with everyday tasks related to Weight & Biases "
            "and provide helpful information. As an expert in the open-source python SDK wandb answer the following"
            "question based on the context. Answer in formatted Markdown.\n"
            "Write a response that appropriately answer the question\n\n"
            "### Question:\n{question}\n\n### Response:\n{answer}").format_map(row)

In [131]:
prompt_template(df.iloc[0])

"You are an AI assistant designed to assist developers with everyday tasks related to Weight & Biases and provide helpful information. As an expert in the open-source python SDK wandb answer the followingquestion based on the context. Answer in formatted Markdown.\nWrite a response that appropriately answer the question\n\n### Question:\nWhat is a 'run' in W&B and what can I use it for?\n\n### Response:\nA 'run' in W&B is the fundamental unit that you can use to track various aspects of your machine learning experiments. Once you initialize a W&B Run object in your Python script or notebook using `wandb.init()`, you can pass a dictionary to the `config` parameter where the key-value pairs are your hyperparameters with their corresponding values. You can use a 'run' to track metrics, create logs, jobs and do many more things."

If you use CodeLLama we need to format the instructions accordingly!

In [132]:
B_INST, E_INST = "[INST] ", " [/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
EOS = "</s>"


def format_text(row):
    text = (
        B_INST
        + B_SYS
        + "You are an AI assistant designed to assist developers with everyday tasks related to Weight & Biases"
        + "and provide helpful information. As an expert in the open-source python SDK wandb answer the following"
        # + "question based on the context below. Answer in formatted Markdown.\n"
        + "question below. Answer in formatted Markdown.\n"
        + row["page_content"]
        + E_SYS
        + row["question"]
        + E_INST
        + "\n[W&B]\n"
        + row["answer"]
        + "\n[/W&B]"
        + EOS
    )

    return text

In [133]:
one_example = format_text(df.iloc[0])
print(one_example)

[INST] <<SYS>>
You are an AI assistant designed to assist developers with everyday tasks related to Weight & Biasesand provide helpful information. As an expert in the open-source python SDK wandb answer the followingquestion below. Answer in formatted Markdown.
import Tabs from ‚Äò@theme/Tabs‚Äô;  

import TabItem from ‚Äò@theme/TabItem‚Äô;


# Quickstart


Install W&B and start tracking your machine learning experiments in minutes.


## 1. Create an account and install W&B


Before you get started, make sure you create an account and install W&B:


1. Sign up for a free account at <https://wandb.ai/site> and then login to your wandb account.
2. Install the wandb library on your machine in a Python 3 environment using `pip`.



The following code snippets demonstrate how to install and log into W&B using the W&B CLI and Python Library:



Install the CLI and Python library for interacting with the Weights and Biases API:



```
pip install wandb

```


Install the CLI and Python libra

In [134]:
dataset = []
for index, row in df.iterrows():
    dataset.append({"text": format_text(row)})

In [135]:
import json
def save_jsonl(data, filename):
    with open(filename, 'w') as file:
        for entry in data:
            json.dump(entry, file)
            file.write('\n')

In [136]:
save_jsonl(dataset, "wandb_questions_ds.jsonl")

## SageMaker auth

In [137]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::372108735839:role/SageMakerExecutionRole
sagemaker bucket: sagemaker-us-east-1-372108735839
sagemaker session region: us-east-1


## Preparing a dataset

In [138]:
MODEL_NAME = "codellama/CodeLlama-7b-Instruct-hf"

In [139]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=True)
tokenizer.pad_token = tokenizer.eos_token

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [140]:
from datasets import load_dataset

In [141]:
dataset = load_dataset(path=".", data_files="wandb_questions_ds.jsonl", split="train")
dataset

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['text'],
    num_rows: 1026
})

In [142]:
dataset[0]

{'text': '[INST] <<SYS>>\nYou are an AI assistant designed to assist developers with everyday tasks related to Weight & Biasesand provide helpful information. As an expert in the open-source python SDK wandb answer the followingquestion below. Answer in formatted Markdown.\nimport Tabs from ‚Äò@theme/Tabs‚Äô;  \n\nimport TabItem from ‚Äò@theme/TabItem‚Äô;\n\n\n# Quickstart\n\n\nInstall W&B and start tracking your machine learning experiments in minutes.\n\n\n## 1. Create an account and install W&B\n\n\nBefore you get started, make sure you create an account and install W&B:\n\n\n1. Sign up for a free account at <https://wandb.ai/site> and then login to your wandb account.\n2. Install the wandb library on your machine in a Python 3 environment using `pip`.\n\n\n\nThe following code snippets demonstrate how to install and log into W&B using the W&B CLI and Python Library:\n\n\n\nInstall the CLI and Python library for interacting with the Weights and Biases API:\n\n\n\n```\npip install wa

We define some helper functions to pack our samples into sequences of a given length and then tokenize them.tokenizer

In [143]:
from random import randint
from itertools import chain
from functools import partial


# template dataset to add prompt to each sample
def template_dataset(sample):
    sample["text"] = f"{sample}{tokenizer.eos_token}"
    return sample


# apply prompt template per sample
dataset = dataset.map(template_dataset)
# print random sample
print(dataset[randint(0, len(dataset))]["text"])

# empty list to save remainder from batches to use in next batch
remainder = {"input_ids": [], "attention_mask": [], "token_type_ids": []}

Map:   0%|          | 0/1026 [00:00<?, ? examples/s]

{'text': '[INST] <<SYS>>\nYou are an AI assistant designed to assist developers with everyday tasks related to Weight & Biasesand provide helpful information. As an expert in the open-source python SDK wandb answer the followingquestion below. Answer in formatted Markdown.\n\n\n# üî•üî• Explore Graph Sampling Techniques using PyTorch Geometric and Weights & Biases ü™Ñüêù\n\n\n\nIf you wish to know how to explore and visualize point cloud datasets using PyTorch Geometric and Weights & Biases, you can check out the following notebook:\n\n\n\n## Install Required Libraries\n\n\n\n```\nimport os\nimport torch\nos.environ[\'TORCH\'] = torch.__version__\nprint(torch.__version__)\n\n```\n\nWe now install PyTorch Geometric according to our PyTorch Version. We also install Weights & Biases.\n\n\n!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html  \n\n!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html  \n\n!pip install -q torch-cluster -f h

In [144]:
def chunk(sample, chunk_length=1024):
    # define global remainder variable to save remainder from batches to use in next batch
    global remainder
    # Concatenate all texts and add remainder from previous batch
    concatenated_examples = {k: list(chain(*sample[k])) for k in sample.keys()}
    concatenated_examples = {k: remainder[k] + concatenated_examples[k] for k in concatenated_examples.keys()}
    # get total number of tokens for batch
    batch_total_length = len(concatenated_examples[list(sample.keys())[0]])

    # get max number of chunks for batch
    if batch_total_length >= chunk_length:
        batch_chunk_length = (batch_total_length // chunk_length) * chunk_length

    # Split by chunks of max_len.
    result = {
        k: [t[i : i + chunk_length] for i in range(0, batch_chunk_length, chunk_length)]
        for k, t in concatenated_examples.items()
    }
    # add remainder to global variable for next batch
    remainder = {k: concatenated_examples[k][batch_chunk_length:] for k in concatenated_examples.keys()}
    # prepare labels
    result["labels"] = result["input_ids"].copy()
    return result


# tokenize and chunk dataset
lm_dataset = dataset.map(
    lambda sample: tokenizer(sample["text"]), batched=True, remove_columns=list(dataset.features)
).map(
    partial(chunk, chunk_length=1024),
    batched=True,
)

# Print total number of samples
print(f"Total number of samples: {len(lm_dataset)}")

Map:   0%|          | 0/1026 [00:00<?, ? examples/s]

Map:   0%|          | 0/1026 [00:00<?, ? examples/s]

Total number of samples: 2396


## Save to a bucket and W&B

In [148]:
# save train_dataset to s3
training_input_path = f's3://{sess.default_bucket()}/processed/wandbot/train'
lm_dataset.save_to_disk(training_input_path)

print("uploaded data to:")
print(f"training dataset to: {training_input_path}")

Saving the dataset (0/1 shards):   0%|          | 0/2396 [00:00<?, ? examples/s]

uploaded data to:
training dataset to: s3://sagemaker-us-east-1-372108735839/processed/wandbot/train


In [149]:
at = wandb.Artifact(
    name="wandbot_dataset", 
    type="dataset",
    description="A wandbot dataset of questions and answers about W&B")

In [150]:
with wandb.init(project="aws_llm_demo"):
    at.add_reference(training_input_path)
    wandb.log_artifact(at)

[34m[1mwandb[0m: Generating checksum for up to 10000 objects in "sagemaker-us-east-1-372108735839/processed/wandbot/train"... Done. 0.1s


