In [None]:
python -m venv mamba_env
pip install packaging wheel torch ipykernel datasets
pip install accelerate -U
pip install .
python -m ipykernel install --user --name=mamba_env

In [26]:
import torch
from transformers import AutoTokenizer, GPTNeoXForCausalLM, GPTNeoXModel
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
import sys
torch.set_default_device("cuda")


# Take in the model you want to train
model_name = "state-spaces/mamba-130m"

# Choose a tokenizer
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
tokenizer.eos_token = "<|endoftext|>"
tokenizer.pad_token = tokenizer.eos_token

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [59]:
from transformers import GPTNeoXForCausalLM, AutoTokenizer, GPTNeoXModel

model = GPTNeoXForCausalLM.from_pretrained(
  "EleutherAI/pythia-160m-deduped",
  #output_hidden_states=True,
  #revision="step3000",
  #cache_dir="./pythia-70m-deduped/step3000",
)


In [55]:
model = MambaLMHeadModel.from_pretrained(
    model_name, 
    device="cuda", 
    dtype=torch.float16)

In [60]:
# Take the user input from the command line
user_message = "Give me three steps to improve my diet, and include some evidence"#input("\n> ")

# Create a prompt
n_shot_prompting = [
    {
        "question": "What is the capital of France?",
        "answer": "Paris"
    },
    {
        "question": "Who invented the segway?",
        "answer": "Dean Kamen"
    },
    {
        "question": "What is the fastest animal?",
        "answer": "Cheetah"
    }
]

prompt = f"You are a Trivia QA bot.\nAnswer the following question succinctly and accurately."
prompt = f"{prompt}\n\n" + "\n\n".join([f"Q: {p['question']}\nA: {p['answer']}" for p in n_shot_prompting])
prompt = f"{prompt}\n\nQ: {user_message}\nA:"

# Debug print to make sure our prompt looks good
print(prompt)

# Encode the text to token IDs
input_ids = torch.LongTensor([tokenizer.encode(prompt)]).cuda()

print(prompt)

# Encode the prompt into integers and convert to a tensor on the GPU
input_ids = torch.LongTensor([tokenizer.encode(prompt)]).cuda()
print(input_ids)

You are a Trivia QA bot.
Answer the following question succinctly and accurately.

Q: What is the capital of France?
A: Paris

Q: Who invented the segway?
A: Dean Kamen

Q: What is the fastest animal?
A: Cheetah

Q: Give me three steps to improve my diet, and include some evidence
A:
You are a Trivia QA bot.
Answer the following question succinctly and accurately.

Q: What is the capital of France?
A: Paris

Q: Who invented the segway?
A: Dean Kamen

Q: What is the fastest animal?
A: Cheetah

Q: Give me three steps to improve my diet, and include some evidence
A:
tensor([[ 1394,   403,   247,   308,  1069,   571,  1165,    34, 17994,    15,
           187, 32869,   253,  1563,  1953, 18382,  4291,   314,   285, 13613,
            15,   187,   187,    50,    27,  1737,   310,   253,  5347,   273,
          6181,    32,   187,    34,    27,  7785,   187,   187,    50,    27,
          8452, 23179,   253,  8753,  1106,    32,   187,    34,    27, 19172,
           611, 18986,   187,   187

In [61]:
# Generate an output sequence of tokens given the input
# "out" will contain the raw token ids as integers
out = model.generate(
    input_ids=input_ids,
    max_length=256,
    eos_token_id=tokenizer.eos_token_id
)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


In [62]:
out = model(
    input_ids=input_ids,
    output_hidden_states=True
)

In [65]:
for k,X in enumerate(out.hidden_states):
    print(k, X.shape)

0 torch.Size([1, 88, 768])
1 torch.Size([1, 88, 768])
2 torch.Size([1, 88, 768])
3 torch.Size([1, 88, 768])
4 torch.Size([1, 88, 768])
5 torch.Size([1, 88, 768])
6 torch.Size([1, 88, 768])
7 torch.Size([1, 88, 768])
8 torch.Size([1, 88, 768])
9 torch.Size([1, 88, 768])
10 torch.Size([1, 88, 768])
11 torch.Size([1, 88, 768])
12 torch.Size([1, 88, 768])


In [58]:
# you must use the tokenizer to decode them back into strings
decoded = tokenizer.batch_decode(out)[0]
print("="*80)
print(decoded)
# out returns the whole sequence plus the original
cleaned = decoded.replace(prompt, "")

# the model will just keep generating, so only grab the first one
# cleaned = cleaned.split("\n\n")[0]
print(cleaned)

You are a Trivia QA bot.
Answer the following question succinctly and accurately.

Q: What is the capital of France?
A: Paris

Q: Who invented the segway?
A: Dean Kamen

Q: What is the fastest animal?
A: Cheetah

Q: Give me three steps to improve my diet, and include some evidence
A: I'm not sure.

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in
 I'm not sure.

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in the world?
A: Minecraft

Q: What is the most popular game in th

In [1]:
from training.train_mamba_with_context import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def run(args):
        
    model = MambaLMHeadModel.from_pretrained(args.model, dtype=torch.bfloat16, device="cuda")

    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer)
    tokenizer.eos_token = "<|endoftext|>"
    tokenizer.pad_token = tokenizer.eos_token

    data_module = SFTDataModule(
        tokenizer=tokenizer,
        data_path=args.data_path,
    )

    trainer = MambaTrainer(
        model=model,
        train_dataset=data_module.dataset,
        tokenizer=tokenizer,
        args=TrainingArguments(
            learning_rate=args.learning_rate,
            num_train_epochs=args.num_epochs,
            per_device_train_batch_size=args.batch_size,
            gradient_accumulation_steps=args.gradient_accumulation_steps,
            optim=args.optim,
            output_dir=args.output,
            save_total_limit=2,
            logging_steps=50,
            save_steps=500,
        ),
        data_collator=data_module.data_collator,
    )

    trainer.train()
    trainer.save_model(args.output)

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="state-spaces/mamba-130m")
parser.add_argument("--output", type=str, default="output")
parser.add_argument("--tokenizer", type=str, default="EleutherAI/gpt-neox-20b")
parser.add_argument("--learning_rate", type=float, default=5e-4)
parser.add_argument("--batch_size", type=int, default=8)
parser.add_argument("--gradient_accumulation_steps", type=int, default=1)
parser.add_argument("--optim", type=str, default="adamw_torch")
parser.add_argument("--data_path", type=str, default="squad")
parser.add_argument("--num_epochs", type=int, default=10)
args = parser.parse_args('')

In [4]:
run(args)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Got 0 examples, preprocess...
Tokenizing dataset...


100%|██████████| 87599/87599 [01:37<00:00, 896.27it/s] 


Step,Training Loss
50,2.4427
100,2.03
150,1.9972
200,2.0486
250,1.988
300,1.9216
350,1.9608
400,2.0017
450,1.8559
500,1.8583


KeyboardInterrupt: 