# Do inference with pretrained models

## Install dependencies and import

In [1]:
!pip install -U trl transformers accelerate git+https://github.com/huggingface/peft.git -Uqqq
!pip install bitsandbytes einops wandb -Uqqq
!pip install -Uqqq datasets===2.16.0

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.0/225.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.1/290.1 kB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m38.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m31.0 MB/s[0

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging, GenerationConfig
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
from huggingface_hub import notebook_login
import pandas as pd
import datasets
import torch
from trl import SFTTrainer
import glob
import re

SEED = 999
BATCH_SIZE = 32
torch.manual_seed(SEED)

<torch._C.Generator at 0x7942fac83670>

## Full lyrics generation with Phi-2

In [3]:
phi_model_name = "simoneteglia/phi-2-lyrical-genius"

bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.float16,
    bnb_4bit_use_double_quant= False,
)

phi_model = AutoModelForCausalLM.from_pretrained(
    phi_model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
    device_map={"": 0}
)

phi_tokenizer = AutoTokenizer.from_pretrained(phi_model_name, trust_remote_code=True, padding_side = "right")
phi_tokenizer.pad_token = phi_tokenizer.unk_token
phi_tokenizer.add_eos_token = True

adapter_config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/863 [00:00<?, ?B/s]

configuration_phi.py:   0%|          | 0.00/9.26k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-2:
- configuration_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi.py:   0%|          | 0.00/62.7k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-2:
- modeling_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/126M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.40k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [17]:
from transformers import GenerationConfig

device = 'cuda' if torch.cuda.is_available() else 'cpu'
def generate_lyrics_phi(query, model, repetition_penalty, temperature):
    encoding = phi_tokenizer(query, return_tensors="pt").to(device)
    generation_config = GenerationConfig(max_new_tokens=256,
                                         pad_token_id = phi_tokenizer.eos_token_id,
                                         repetition_penalty=repetition_penalty,
                                         eos_token_id = phi_tokenizer.eos_token_id,
                                         temperature=temperature,
                                         do_sample=True)

    outputs = phi_model.generate(input_ids=encoding.input_ids, generation_config=generation_config)
    text_output = phi_tokenizer.decode(outputs[0],skip_special_tokens=True)
    print('INPUT\n', query, '\n\nOUTPUT\n', text_output[len(query):])


In [15]:
prompt = "Generate lyrics for a rock song"
formatted_prompt = f"### Instruction: {prompt} ### Assistant: "
generate_lyrics(prompt, model, repetition_penalty = 1.2, temperature = 0.9)

INPUT
 Generate lyrics for a rock song 

OUTPUT
 .
[Verse 1]
I got you in my arms, I'll give everything there is to protect
You are the sun that rises every day over me
All this time we've known each other, but today it's different
The fire inside of you burns brightly right down here on earth
Baby, can you turn up your flame? Can't keep you out all night long?"
[Chorus]
Tonight when the moon be high and it's dark outside (Outside)
And I'm with some girl who knows how to take off my clothes just like before (Before)
She touches the same place she always touched until now
She kissed me so much tonight, oh yeah! She knew exactly what to do tomorrow 'til then
Oh yeah! The way things feel between us never gets old
But tonight could change everything forever—oh-oh! Do you think I know what a ride this will be someday? Just sayin' somethin'
Yeah, baby don't let anybody ruin something good anymore
How about somebody else starts messing around? You better have someone ready now
For as soon as 

## Lyrics completion with Mistral 7B

In [16]:
model_name = "simoneteglia/ye_mistral7B"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

adapter_config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/969 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

In [20]:
def generate_lyrics_mistral(query, model, repetition_penalty, temperature):
    encoding = tokenizer(query, return_tensors="pt").to(device)
    generation_config = GenerationConfig(max_new_tokens=256,
                                         pad_token_id = tokenizer.eos_token_id,
                                         repetition_penalty=repetition_penalty,
                                         eos_token_id = tokenizer.eos_token_id,
                                         temperature=temperature,
                                         do_sample=True)

    outputs = model.generate(input_ids=encoding.input_ids, generation_config=generation_config)
    text_output = tokenizer.decode(outputs[0],skip_special_tokens=True)
    print('MISTRAL INPUT\n', query, '\n\nOUTPUT\n', text_output[len(query):])

In [21]:
generate_lyrics_mistral("""I'm tryna keep my faith
We on an ultralight beam
We on an ultralight beam
This is a God dream
This is a God dream""", model, repetition_penalty = 1.5, temperature = 0.8)

MISTRAL INPUT
 I'm tryna keep my faith
We on an ultralight beam
We on an ultralight beam
This is a God dream
This is a God dream 

OUTPUT
 , this what we do when the sun go down
Niggas ain’t shit except for 9-5ers and lawyers
Young niggaz don’t know that they got it where you stash at?
They gon be hatin’, but still love your ass off
And all these girls like “Yo yo yo”"
“I can see through soul; yeezy brown skin." - Kanye West
