<a href="https://colab.research.google.com/github/mkeohane01/JamSesh/blob/main/modeling/training_jamsesh.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup Lit GPT

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
!pip install 'litgpt[all] @ git+https://github.com/Lightning-AI/litgpt'

In [None]:
# !litgpt download --repo_id mistralai/Mistral-7B-v0.1
!litgpt download --repo_id mistralai/Mistral-7B-Instruct-v0.2

In [10]:
test_prompt = """
### Instruction:
Create a list of chords, a corresponding scale to improvise with, title, and style along with an example in ABC notation based on this input. Respond in JSON format.\n\n                        Given the input, create an output exactly in this format: \n \n                            \"output\": {{\n                                \"chords\": \"## Suggested chord progression\",\n                                \"scales\": \"## Suggested scale for improvising\",\n                                \"title\": \"## Title of Jam\",\n                                \"style\": \"## Style to play like\",\n                                \"example\": `\n                                    ## ABC notation for an example section using these chords and notes\n                                `\n                            }}
### Input:
Simple melodic blues progression
### Response:
"""

In [None]:
!litgpt generate base --precision bf16-true --checkpoint_dir checkpoints/mistralai/Mistral-7B-Instruct-v0.2 --prompt

## Process Data

In [None]:
!git clone https://github.com/mkeohane01/JamSesh.git

In [None]:
!unzip  ./drive/MyDrive/mistral7B-finetuned-jamsesh.zip -d ./checkpoints/jamsesh-lora/

## Finetune Model

In [12]:
!litgpt finetune lora \
  --checkpoint_dir "checkpoints/mistralai/Mistral-7B-Instruct-v0.2/" \
  --precision bf16-true \
  --quantize bnb.nf4 \
  --data JSON \
  --data.json_path "JamSesh/data/sft_jsons/sft_data_jamsesh.json" \
  --data.val_split_fraction 0.05 \
  --out_dir "out/mistral7B-jamsesh/" \
  --train.micro_batch_size 2 \
  --train.global_batch_size 4 \
  --train.save_interval 100 \
  --train.log_interval 8 \
  --train.epochs 30

{'checkpoint_dir': PosixPath('checkpoints/mistralai/Mistral-7B-Instruct-v0.2'),
 'data': JSON(json_path=PosixPath('JamSesh/data/sft_jsons/sft_data_jamsesh.json'),
              mask_prompt=False,
              val_split_fraction=0.05,
              prompt_style=<litgpt.prompts.Alpaca object at 0x7e5884f1df00>,
              ignore_index=-100,
              seed=42,
              num_workers=4),
 'devices': 1,
 'eval': EvalArgs(interval=100, max_new_tokens=100, max_iters=100),
 'logger_name': 'csv',
 'lora_alpha': 16,
 'lora_dropout': 0.05,
 'lora_head': False,
 'lora_key': False,
 'lora_mlp': False,
 'lora_projection': False,
 'lora_query': True,
 'lora_r': 8,
 'lora_value': True,
 'out_dir': PosixPath('out/mistral7B-jamsesh'),
 'precision': 'bf16-true',
 'quantize': 'bnb.nf4',
 'seed': 1337,
 'train': TrainArgs(save_interval=100,
                    log_interval=8,
                    global_batch_size=4,
                    micro_batch_size=2,
                    lr_warmup_steps=100,

In [None]:
!litgpt generate base \
  --checkpoint_dir "checkpoints/jamsesh-lora/out/lora_weights/mistral7B-jamsesh-2/final/" \
  --prompt f"{test_prompt}"

Loading model 'checkpoints/jamsesh-lora/out/lora_weights/mistral7B-finetuned/final/lit_model.pth' with {'name': 'Mistral-7B-Instruct-v0.2', 'hf_config': {'name': 'Mistral-7B-Instruct-v0.2', 'org': 'mistralai'}, 'scale_embeddings': False, 'block_size': 32768, 'vocab_size': 32000, 'padding_multiple': 512, 'padded_vocab_size': 32000, 'n_layer': 32, 'n_head': 32, 'head_size': 128, 'n_embd': 4096, 'rotary_percentage': 1.0, 'parallel_residual': False, 'bias': False, 'lm_head_bias': False, 'n_query_groups': 8, 'shared_attention_norm': False, 'norm_class_name': 'RMSNorm', 'norm_eps': 1e-05, 'mlp_class_name': 'LLaMAMLP', 'gelu_approximate': 'none', 'intermediate_size': 14336, 'rope_condense_ratio': 1, 'rope_base': 10000, 'n_expert': 0, 'n_expert_per_token': 0, 'rope_n_elem': 128}
Time to instantiate model: 0.35 seconds.
Time to load the model weights: 91.45 seconds.
Seed set to 1234
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### In

In [3]:
!litgpt merge_lora \
 --checkpoint_dir "/content/checkpoints/jamsesh-lora/out/mistral7B-jamsesh/step-000700/" \
 --pretrained_checkpoint_dir "checkpoints/mistralai/Mistral-7B-Instruct-v0.2/"

Saved merged weights to '/content/checkpoints/jamsesh-lora/out/mistral7B-jamsesh/step-000700/lit_model.pth'


In [None]:
!zip -r mistral7B-finetuned-jamsesh.zip out/mistral7B-jamsesh/step-000700/

## Huggingface

In [None]:
!litgpt convert from_litgpt \
    --checkpoint_dir /content/checkpoints/jamsesh-lora/out/mistral7B-jamsesh/step-000700/ \
    --output_dir checkpoints/jamsesh-tuned-hf

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(load_in_8bit=True)
state_dict = torch.load('checkpoints/jamsesh-tuned-hf/model.pth')

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2",
                                  state_dict=state_dict,
                                  device_map="auto",
                                  quantization_config=quantization_config
                                  )

In [3]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [None]:
model.push_to_hub("mkeohane01/jamsesh")
tokenizer.push_to_hub("mkeohane01/jamsesh")

### Eval

In [4]:
# Pretrained Mistral Instruct
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2",
                                device_map="auto",
                                quantization_config=quantization_config
                                )

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [1]:
test_prompt = """
### Instruction:
Create a list of chords, a corresponding scale to improvise with, title, and style along with an example in ABC notation based on this input. Respond in JSON format.\n\n                        Given the input, create an output exactly in this format: \n \n                            \"output\": {{\n                                \"chords\": \"## Suggested chord progression\",\n                                \"scales\": \"## Suggested scale for improvising\",\n                                \"title\": \"## Title of Jam\",\n                                \"style\": \"## Style to play like\",\n                                \"example\": `\n                                    ## ABC notation for an example section using these chords and notes\n                                `\n                            }}
### Input:
Simple melodic blues progression in Dm
### Response:
"""

In [5]:
# Input text
# input_text = "Write me a 12 bar blues progression in ABC notation."

# Tokenize input
input_ids = tokenizer.encode(test_prompt, return_tensors='pt')

# Generate response
output_ids = model.generate(input_ids, max_new_tokens=450)

# Decode and print output
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(output_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



### Instruction:
Create a list of chords, a corresponding scale to improvise with, title, and style along with an example in ABC notation based on this input. Respond in JSON format.

                        Given the input, create an output exactly in this format: 
 
                            "output": {{
                                "chords": "## Suggested chord progression",
                                "scales": "## Suggested scale for improvising",
                                "title": "## Title of Jam",
                                "style": "## Style to play like",
                                "example": `
                                    ## ABC notation for an example section using these chords and notes
                                `
                            }}
### Input:
Simple melodic blues progression in Dm
### Response:
```json
{
   output: {


In [14]:
clean_state_dict = {k: v for k, v in model.state_dict().items() if torch.is_tensor(v)}

In [12]:
torch.cuda.empty_cache()