## Preparing the Data

In [1]:
from sklearn.datasets import fetch_20newsgroups
import pandas as pd

categories = ['rec.sport.baseball', 'rec.sport.hockey']
sports_dataset = fetch_20newsgroups(subset='train', shuffle=True, random_state=42, categories=categories)

ModuleNotFoundError: No module named 'sklearn'

The data needs to end up in a CSV file that has two columns: `prompt` and `response`, and that is publicly accessible.

In [None]:
from scale-llm-engine import FineTune, Completion, Model

create_fine_tune_response = FineTune.create(
    model="llama-7b",
    training_file="s3://TODO",
    validation_file=None,
    hyperparameters={},
    suffix="my-first-finetune"
)

fine_tune_id = create_fine_tune_response.fine_tune_id


"""
Args:
            training_file (`str`):
                A path to the file containing the training dataset.
                Dataset must be a CSV file with columns 'prompt' and 'response'.
                The value must be the URI of a publicly accessible file on bulk storage, e.g.
                s3://{public_s3_bucket}/{public_s3_key} for a file stored on s3.
            validation_file (`str`):
                A path to the file containing the validation dataset.
                Has the same format as training_file.
                If not provided, we will generate a split from the training dataset.
            model_name (`str`):
                The name of the fine-tuned model
            base_model (`str`):
                Base model to train from
            fine_tuning_method (`str`):
                Fine-tuning method
            hyperparameters (`str`):
                Hyperparameters
"""

In [None]:
# Wait for fine tune to complete

import time
while True:
    fine_tune_status = FineTune.retrieve(fine_tune_id).status
    if fine_tune_status == "SUCCESS":
        break
    elif fine_tune_status in ["FAILURE", "CANCELLED"]:
        raise ValueError("Fine Tune failed")
    time.sleep(10)


In [None]:
all_models = Model.list().model_endpoints

# We want to get just your fine-tuned models.
your_personal_fine_tunes = [model for model in all_models if not model.spec.public_inference]

your_fine_tuned_model = your_personal_fine_tunes[0].name

In [None]:
response = Completion.create(model_name=your_fine_tuned_model, prompt="TODO")