In [1]:
!pip uninstall -y tensorflow --quiet
!pip install ludwig
!pip install ludwig[llm]

Collecting ludwig
  Downloading ludwig-0.8.6.tar.gz (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting PyYAML!=5.4.*,<6.0.1,>=3.12 (from ludwig)
  Downloading PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (682 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m682.2/682.2 kB[0m [31m49.9 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json (from ludwig)
  Downloading dataclasses_json-0.6.2-py3-none-any.whl (28 kB)
Collecting jsonschema<4.7,>=4.5.0 (from ludwig)
  Downloading jsonschema-4.6.2-py3-none-any.whl (80 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.8/80.8 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h

Collecting sentence-transformers (from ludwig[llm])
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting faiss-cpu (from ludwig[llm])
  Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m65.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate (from ludwig[llm])
  Downloading accelerate-0.24.1-py3-none-any.whl (261 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting loralib (from ludwig[llm])
  Downloading loralib-0.1.2-py3-none-any.whl (10 kB)
Collecting peft>=0.4.0 (from ludwig[llm])
  Downloading peft-0.6.2-py3-none-any.whl (174 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [2]:
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))

get_ipython().events.register('pre_run_cell', set_css)

In [3]:
import os
import getpass
import locale

def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

os.environ["HUGGING_FACE_HUB_TOKEN"] = getpass.getpass("Token:")
assert os.environ["HUGGING_FACE_HUB_TOKEN"]

Token:··········


In [4]:
import pandas as pd

df = pd.read_json("https://raw.githubusercontent.com/sahil280114/codealpaca/master/data/code_alpaca_20k.json")
df = df.loc[df["input"]=='']  # pick only data that does not require any input column value
df = df.drop(["input"], axis=1)
df = df.tail(n=500)  # tried with 100 and then moved back to 500

In [5]:
import yaml
import logging
from ludwig.api import LudwigModel

"""
Uncomment the below to experiment with zero-shot prompting of the model.

Note that you may need to restart the notebook and comment out this section again
if you wish to perform the fine-tuning below, as running the zero-shot prompting
will allocate some additional memory on the GPU that can lead to CUDA OOM issues.
"""

zero_shot_config = yaml.safe_load(
  """
  model_type: llm
  base_model: Siddharthvij10/MistralSharded2

  input_features:
    - name: instruction
      type: text

  output_features:
    - name: output
      type: text

  prompt:
    template: >-
      Below is an instruction that describes a task.Write a response that appropriately completes the request.

      ### Instruction: {instruction}

      ### Response:

  generation:
    temperature: 0.1 # Temperature is used to control the randomness of predictions.
    max_new_tokens: 512

  adapter:
    type: lora

  quantization:
    bits: 4

  preprocessing:
    global_max_sequence_length: 512
    split:
      type: random

  trainer:
    type: finetune
    epochs: 1
    batch_size: 1
    eval_batch_size: 2
    gradient_accumulation_steps: 16
    learning_rate: 0.0004
    learning_rate_scheduler:
      warmup_fraction: 0.03

  """
)

model = LudwigModel(config=zero_shot_config, logging_level=logging.INFO)
results = model.train(dataset=df)

config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

INFO:ludwig.utils.print_utils:
INFO:ludwig.utils.print_utils:╒════════════════════════╕
INFO:ludwig.utils.print_utils:│ EXPERIMENT DESCRIPTION │
INFO:ludwig.utils.print_utils:╘════════════════════════╛
INFO:ludwig.utils.print_utils:
INFO:ludwig.api:╒══════════════════╤═════════════════════════════════════════════════════════════════════════════════════════╕
│ Experiment name  │ api_experiment                                                                          │
├──────────────────┼─────────────────────────────────────────────────────────────────────────────────────────┤
│ Model name       │ run                                                                                     │
├──────────────────┼─────────────────────────────────────────────────────────────────────────────────────────┤
│ Output directory │ /content/results/api_experiment_run                                                     │
├──────────────────┼─────────────────────────────────────────────────────────────────

tokenizer_config.json:   0%|          | 0.00/941 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of Siddharthvij10/MistralSharded2 tokenizer
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
INFO:ludwig.features.text_feature:Max length of feature 'None': 80 (without start and stop symbols)
INFO:ludwig.features.text_feature:Setting max length using dataset: 82 (including start and stop symbols)
INFO:ludwig.features.text_feature:max sequence length is 82 for feature 'None'
INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of Siddharthvij10/MistralSharded2 tokenizer
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
INFO:ludwig.features.text_feature:Max length of feature 'output': 1310 (without start and stop symbols)
INFO:ludwig.features.text_feature:Setting max length using dataset: 1312 (including start and stop symbols)
INFO:lu

pytorch_model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

pytorch_model-00001-of-00008.bin:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

pytorch_model-00002-of-00008.bin:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

pytorch_model-00003-of-00008.bin:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

pytorch_model-00004-of-00008.bin:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

pytorch_model-00005-of-00008.bin:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

pytorch_model-00006-of-00008.bin:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

pytorch_model-00007-of-00008.bin:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

pytorch_model-00008-of-00008.bin:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

INFO:ludwig.models.llm:Done.
INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of Siddharthvij10/MistralSharded2 tokenizer
INFO:ludwig.models.llm:Trainable Parameter Summary For Fine-Tuning
INFO:ludwig.models.llm:Fine-tuning with adapter: lora
INFO:ludwig.utils.print_utils:
INFO:ludwig.utils.print_utils:╒══════════╕
INFO:ludwig.utils.print_utils:│ TRAINING │
INFO:ludwig.utils.print_utils:╘══════════╛
INFO:ludwig.utils.print_utils:


trainable params: 3,407,872 || all params: 7,245,139,968 || trainable%: 0.04703666202518836


INFO:ludwig.trainers.trainer:Creating fresh model training run.
INFO:ludwig.trainers.trainer:Training for 350 step(s), approximately 1 epoch(s).
INFO:ludwig.trainers.trainer:Early stopping policy: 5 round(s) of evaluation, or 1750 step(s), approximately 5 epoch(s).

INFO:ludwig.trainers.trainer:Starting with step 0, epoch: 0


Training: 100%|██████████| 350/350 [03:47<00:00,  1.26it/s, loss=0.0483]

INFO:ludwig.trainers.trainer:
Running evaluation for step: 350, epoch: 0


Evaluation valid: 100%|██████████| 25/25 [00:13<00:00,  1.89it/s]
Evaluation test : 100%|██████████| 50/50 [00:29<00:00,  1.70it/s]

INFO:ludwig.trainers.trainer:Evaluation took 42.7160s

INFO:ludwig.utils.metrics_printed_table:╒═══════════════════════╤════════════╤══════════════╤════════════╕
│                       │      train │   validation │       test │
╞═══════════════════════╪════════════╪══════════════╪════════════╡
│ bleu                  │     0.1898 │       0.1757 │     0.1800 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ char_error_rate       │     1.0758 │       1.1896 │     1.3536 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ loss                  │     0.8823 │       0.6134 │     0.7247 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ next_token_perplexity │ 15181.1406 │   14806.8838 │ 15152.5928 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ perplexity            │ 31850.9082 │   31530.5820 │ 31609.4004 │
├───────────────────────┼────────────┼──────────────┼────────────┤
│ rouge1_fmeasure       │     0.47


Training: 100%|██████████| 350/350 [04:31<00:00,  1.29it/s, loss=0.0483]

INFO:ludwig.utils.print_utils:
INFO:ludwig.utils.print_utils:╒═════════════════╕
INFO:ludwig.utils.print_utils:│ TRAINING REPORT │
INFO:ludwig.utils.print_utils:╘═════════════════╛
INFO:ludwig.utils.print_utils:
INFO:ludwig.api:╒══════════════════════════════╤═══════════════════╕
│ Validation feature           │ output            │
├──────────────────────────────┼───────────────────┤
│ Validation metric            │ loss              │
├──────────────────────────────┼───────────────────┤
│ Best model step              │ 350               │
├──────────────────────────────┼───────────────────┤
│ Best model epoch             │ 1                 │
├──────────────────────────────┼───────────────────┤
│ Best model's validation loss │ 0.613406240940094 │
├──────────────────────────────┼───────────────────┤
│ Best model's test loss       │ 0.724659264087677 │
╘══════════════════════════════╧═══════════════════╛
INFO:ludwig.api:
Finished: api_experiment_run
INFO:ludwig.api:Saved to: /content/re




In [8]:
df_inference = pd.read_json("https://raw.githubusercontent.com/sahil280114/codealpaca/master/data/code_alpaca_20k.json")

df_inference = df_inference.loc[df_inference["input"]=='']  # pick only data that does not require any input column value
df_inference = df_inference.drop(["input"], axis=1)

df_inference = df_inference.head(n=10)
df_inference

Unnamed: 0,instruction,output
0,Create an array of length 5 which contains all...,"arr = [2, 4, 6, 8, 10]"
1,Formulate an equation to calculate the height ...,Height of triangle = opposite side length * si...
3,Create an array of length 15 containing number...,"arr = [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33..."
5,Create a nested loop to print every combinatio...,for i in range(10):\n for j in range(10):\n...
8,Write a class to represent a 2D point with x a...,"class Point:\n def __init__(self, x, y):\n ..."
10,Write a function that checks if a give number ...,def is_even(n):\n if n%2 == 0:\n ret...
11,Write code to create a list of all prime numbe...,def find_primes(n): \n prime_list = [2] \n ...
13,Write code to find the sum of all numbers betw...,"sum = 0\nfor i in range(1,11):\n sum += i\n..."
14,Create a function to calculate the area of a g...,def area_circle(radius):\n return 3.14 * (r...
16,Develop a classification algorithm in Python t...,import pandas as pd\nimport numpy as np\n\n# D...


In [9]:
predictions = model.predict(df_inference)[0]

INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of Siddharthvij10/MistralSharded2 tokenizer
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:ludwig.models.llm:Decoded text inputs for the first example in batch: below is an instruction that describes a task.write a response that appropriately completes the request.
### instruction: create an array of length 5 which contains all even numbers between 1 and 10.
### response:
INFO:ludwig.models.llm:Decoded generated output for the first example in batch: below is an instruction that describes a task.write a response that appropriately completes the request.
### instruction: create an array of length 5 which contains all even numbers between 1 and 10.
### response: [2, 4, 6, 8, 10]
INFO:ludwig.models.llm:Decoded text inputs for the first example in batch: below is an instruction that describes a task.write a response that appropriately completes the request.
### instruction: formulate an equation to calculate the height of a triangle given the angle, side lengths and opposite side length.
### response:
INFO:ludwig.models.llm:Decoded generated output for the first example in 

Prediction: 100%|██████████| 1/1 [00:53<00:00, 53.24s/it]


INFO:ludwig.utils.tokenizers:Loaded HuggingFace implementation of Siddharthvij10/MistralSharded2 tokenizer
  return np.sum(np.log(sequence_probabilities))


In [12]:
!ludwig upload hf_hub --repo_id siddharthvij10/ludwig_finetuned --model_path /content/results/api_experiment_run

adapter_model.bin: 100% 13.7M/13.7M [00:02<00:00, 5.21MB/s]
Model uploaded to `https://huggingface.co/siddharthvij10/ludwig_finetuned/tree/main/` with repository name `siddharthvij10/ludwig_finetuned`
