In [1]:
!nvidia-smi

Mon Oct  9 20:58:06 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.113.01             Driver Version: 535.113.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        Off | 00000000:2D:00.0 Off |                  N/A |
|  0%   41C    P8              22W / 370W |    102MiB / 24576MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
from dataclasses import dataclass, field
from typing import Optional

import torch
from accelerate import Accelerator
from datasets import load_dataset
from peft import LoraConfig
from tqdm import tqdm
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, HfArgumentParser, TrainingArguments

from trl import SFTTrainer

tqdm.pandas()


# Define and parse arguments.
@dataclass
class ScriptArguments:
    """
    The name of the Casual LM model we wish to fine with SFTTrainer
    """

    model_name: Optional[str] = field(default="facebook/opt-350m", metadata={"help": "the model name"})
    dataset_name: Optional[str] = field(
        default="timdettmers/openassistant-guanaco", metadata={"help": "the dataset name"}
    )
    dataset_text_field: Optional[str] = field(default="text", metadata={"help": "the text field of the dataset"})
    log_with: Optional[str] = field(default=None, metadata={"help": "use 'wandb' to log with wandb"})
    learning_rate: Optional[float] = field(default=1.41e-5, metadata={"help": "the learning rate"})
    batch_size: Optional[int] = field(default=64, metadata={"help": "the batch size"})
    seq_length: Optional[int] = field(default=512, metadata={"help": "Input sequence length"})
    gradient_accumulation_steps: Optional[int] = field(
        default=16, metadata={"help": "the number of gradient accumulation steps"}
    )
    load_in_8bit: Optional[bool] = field(default=False, metadata={"help": "load the model in 8 bits precision"})
    load_in_4bit: Optional[bool] = field(default=False, metadata={"help": "load the model in 4 bits precision"})
    use_peft: Optional[bool] = field(default=False, metadata={"help": "Wether to use PEFT or not to train adapters"})
    trust_remote_code: Optional[bool] = field(default=True, metadata={"help": "Enable `trust_remote_code`"})
    output_dir: Optional[str] = field(default="output", metadata={"help": "the output directory"})
    peft_lora_r: Optional[int] = field(default=64, metadata={"help": "the r parameter of the LoRA adapters"})
    peft_lora_alpha: Optional[int] = field(default=16, metadata={"help": "the alpha parameter of the LoRA adapters"})
    logging_steps: Optional[int] = field(default=1, metadata={"help": "the number of logging steps"})
    use_auth_token: Optional[bool] = field(default=True, metadata={"help": "Use HF auth token to access the model"})
    num_train_epochs: Optional[int] = field(default=3, metadata={"help": "the number of training epochs"})
    max_steps: Optional[int] = field(default=-1, metadata={"help": "the number of training steps"})
    save_steps: Optional[int] = field(
        default=100, metadata={"help": "Number of updates steps before two checkpoint saves"}
    )
    save_total_limit: Optional[int] = field(default=10, metadata={"help": "Limits total number of checkpoints."})
    push_to_hub: Optional[bool] = field(default=False, metadata={"help": "Push the model to HF Hub"})
    hub_model_id: Optional[str] = field(default=None, metadata={"help": "The name of the model on HF Hub"})


parser = HfArgumentParser(ScriptArguments)
script_args = parser.parse_args_into_dataclasses()[0]


In [3]:
!ls /mystuff/llm

Llama-2-13B-GPTQ			 btlm-3b-8k-base
Llama-2-7b-chat-hf			 datasets
Llama-2-7b-hf				 flan-t5-large
Mistral-7B-v0.1				 flan-t5-xl
Mistral-7B-v0.1-GPTQ			 flan-t5-xxl
Qwen-7B					 flan-ul2
T0					 gte-base
T0pp					 hmm
WizardLM-1.0-Uncensored-Llama2-13B-GPTQ  minotaur-15b
WizardLM-33B-V1.0-Uncensored-GPTQ	 mistral-package
Xwin-LM-70B-V0.1			 open_llama_3b_v2
airoboros-l2-13B-3.0-GPTQ		 phi-1_5
airoboros-l2-7B-3.0-GPTQ		 processed
all-MiniLM-L12-v2			 starchat-beta
bge-base-en				 starcoder
bge-small-en				 vicuna-13b-v1.5-16k
bge.zip


In [5]:
from datasets import load_dataset
dataset = load_dataset("parquet", data_files={'train': 'llama_train_8192.parquet'}, split="train")
eval_dataset = load_dataset("parquet", data_files={'test': 'llama_valid.parquet'}, split="test")


In [6]:
dataset

Dataset({
    features: ['text', 'size'],
    num_rows: 8192
})

In [7]:
eval_dataset[0]

{'text': '<s>[INST]answer the following multiple choice question with a single letter matching the option for the correct answer. use the provided context with your own knowledge to find the correct answer.the set of possible letters for options is A, B, C, D, E. answer with the letter of the correct answer only, and nothing more. \n\ncontext: The presence of a clustered thick disk-like component of dark matter in the Galaxy has been suggested by Sanchez-Salcedo (1997, 1999) and Kerins (1997).Kerins, E. J. 1997, Astronomy and Astrophysics, 322, 709-718 (ADS entry )Sánchez-Salcedo, F. J. 1997, Astrophysical Journal, 487, L61-L64 (ADS entry )Sánchez-Salcedo, F. J. 1999, Monthly Notices of the Royal Astronomical Society, 303, 755-772 (ADS entry ) ==See also== * Dark matter * Brown dwarfs * White dwarfs * Microlensing * Hypercompact stellar system * Massive compact halo object (MACHOs) * Weakly interacting massive particles (WIMPs) ==References== Category:Star clusters Category:Open cluste

In [8]:

p_load_in_8_bit = True
p_load_in_4_bit = False
model_name = "/mystuff/llm/Llama-2-7b-hf"
trust_remote = True
use_auth_token = False
#dataset_name = "timdettmers/openassistant-guanaco"
output_dir = "llama_finetuned_base_8k"
batch_size = 2
gradient_accumulation_steps = batch_size
learning_rate = 1.41e-5
logging_steps = 100
num_train_epochs = 2
max_steps = 10
log_with = "none"
save_steps = 1000
save_total_limit = 5
push_to_hub = False
hub_model_id = None
use_peft = True
peft_lora_r = 64
peft_lora_alpha = 16
seq_length = 4096
load_best = True
dataset_text_field = "text"


# Step 1: Load the model
if p_load_in_8_bit and p_load_in_4_bit:
    raise ValueError("You can't load the model in 8 bits and 4 bits at the same time")
elif p_load_in_8_bit or p_load_in_4_bit:
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=p_load_in_8_bit, load_in_4bit=p_load_in_4_bit
    )
    # Copy the model to each device
    device_map = {"": Accelerator().local_process_index}
    torch_dtype = torch.bfloat16
else:
    device_map = None
    quantization_config = None
    torch_dtype = None

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map=device_map,
    trust_remote_code=trust_remote,
    torch_dtype=torch_dtype,
    use_auth_token=use_auth_token,
)

# Step 2: Load the dataset
#dataset = load_dataset(dataset_name, split="train")

# Step 3: Define the training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=gradient_accumulation_steps,
    learning_rate=learning_rate,
    logging_steps=logging_steps,
    num_train_epochs=num_train_epochs,
    #max_steps=max_steps,
    report_to=log_with,
    save_steps=save_steps,
    save_total_limit=save_total_limit,
    push_to_hub=push_to_hub,
    hub_model_id=hub_model_id,
    load_best_model_at_end=load_best,
    evaluation_strategy = "steps",
    eval_steps = 50,
    #evaluate_during_training=True,
)

# Step 4: Define the LoraConfig
if use_peft:
    peft_config = LoraConfig(
        r=peft_lora_r,
        lora_alpha=peft_lora_alpha,
        bias="none",
        task_type="CAUSAL_LM",
    )
else:
    peft_config = None

# Step 5: Define the Trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    max_seq_length=seq_length,
    train_dataset=dataset,
    dataset_text_field=dataset_text_field,
    peft_config=peft_config,
    eval_dataset = eval_dataset,
)

trainer.train()

# Step 6: Save the model
trainer.save_model(output_dir)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`,  it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.
Using pad_token, but it is not set yet.


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
50,No log,1.816811
100,1.855000,1.742076
150,1.855000,1.637434
200,1.635500,1.602499
250,1.635500,1.592242
300,1.590200,1.587006
350,1.590200,1.583118
400,1.602800,1.580192
450,1.602800,1.577583
500,1.565600,1.575887




In [9]:
type(model)

transformers.models.llama.modeling_llama.LlamaForCausalLM

In [10]:
from peft import PeftModel

In [12]:
model_name = "/mystuff/llm/Llama-2-7b-hf"
lora_dir = "llama_finetuned_base_8k"

model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0", trust_remote_code=True, torch_dtype="auto")

#model = AutoModelForCausalLM.from_pretrained()
model = PeftModel.from_pretrained(model, lora_dir)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
#https://github.com/huggingface/peft/issues/638
model = model.merge_and_unload()

model.save_pretrained("merged_model_llama2_finetuned_peft")

In [None]:
#for x in range(30):
#    print(len(dataset[x]["text"]))


In [None]:

p_load_in_8_bit = True
p_load_in_4_bit = False
model_name = "/mystuff/llm/Llama-2-7b-chat-hf"
trust_remote = True
use_auth_token = False
dataset_name = "timdettmers/openassistant-guanaco"
output_dir = "llama_finetuned"
batch_size = 8
gradient_accumulation_steps = 1
learning_rate = 1.41e-5
logging_steps = 1
num_train_epochs = 3
max_steps = 1
log_with = "none"
save_steps = 100
save_total_limit = 10
push_to_hub = False
hub_model_id = None
use_peft = True
peft_lora_r = 64
peft_lora_alpha = 16
seq_length = 1024
dataset_text_field = "text"


# Step 1: Load the model
if p_load_in_8_bit and p_load_in_4_bit:
    raise ValueError("You can't load the model in 8 bits and 4 bits at the same time")
elif p_load_in_8_bit or p_load_in_4_bit:
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=p_load_in_8_bit, load_in_4bit=p_load_in_4_bit
    )
    # Copy the model to each device
    device_map = {"": Accelerator().local_process_index}
    torch_dtype = torch.bfloat16
else:
    device_map = None
    quantization_config = None
    torch_dtype = None

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map=device_map,
    trust_remote_code=trust_remote,
    torch_dtype=torch_dtype,
    use_auth_token=use_auth_token,
)

# Step 2: Load the dataset
dataset = load_dataset(dataset_name, split="train")

# Step 3: Define the training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    learning_rate=learning_rate,
    logging_steps=logging_steps,
    num_train_epochs=num_train_epochs,
    max_steps=max_steps,
    report_to=log_with,
    save_steps=save_steps,
    save_total_limit=save_total_limit,
    push_to_hub=push_to_hub,
    hub_model_id=hub_model_id,
)

# Step 4: Define the LoraConfig
if use_peft:
    peft_config = LoraConfig(
        r=peft_lora_r,
        lora_alpha=peft_lora_alpha,
        bias="none",
        task_type="CAUSAL_LM",
    )
else:
    peft_config = None

# Step 5: Define the Trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    max_seq_length=seq_length,
    train_dataset=dataset,
    dataset_text_field=dataset_text_field,
    peft_config=peft_config,
)

trainer.train()

# Step 6: Save the model
trainer.save_model(output_dir)

In [None]:
model_name = "/mystuff/llm/Llama-2-7b-chat-hf"

In [None]:
from datetime import datetime
import os
import sys
from tqdm.notebook import tqdm

import torch
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
)
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq

In [None]:
from datasets import load_dataset
#dataset = load_dataset(dataset_name, split="train")
train_dataset = load_dataset("parquet", data_files={'train': 'llama_train.parquet'}, split="train")
eval_dataset = load_dataset("parquet", data_files={'test': 'llama_valid.parquet'}, split="test")


In [None]:
dataset_name = "timdettmers/openassistant-guanaco"
guantaco_dataset = load_dataset(dataset_name, split="train")


In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, local_files_only=True)


In [None]:
my_sizes = []
for n in tqdm(range(train_dataset.shape[0])):
    count = len(tokenizer(train_dataset[n]["text"])["input_ids"])
    my_sizes.append(count)

In [None]:
train_dataset.shape

In [27]:
import pandas as pd

df_my_counts = pd.DataFrame()
df_my_counts["size"] = my_sizes

In [28]:
df_my_counts.describe()

Unnamed: 0,size
count,60347.0
mean,1655.850846
std,2650.810107
min,297.0
25%,1084.0
50%,1321.0
75%,1664.0
max,405056.0


In [43]:
import numpy as np

np.argmax(df_my_counts["size"])


21133

In [45]:
#train_dataset[21133]

In [47]:
train_dataset[df_my_counts["size"] < 4000].shape

AttributeError: 'dict' object has no attribute 'shape'

In [None]:
df_my_counts_smaller =

In [48]:
!ls -l llama_train.parquet

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-rw-r--r-- 1 root root 180072962 Sep 12 21:18 llama_train.parquet


In [37]:
q_sizes = []
for n in tqdm(range(guantaco_dataset.shape[0])):
    count = len(tokenizer(guantaco_dataset[n]["text"])["input_ids"])
    q_sizes.append(count)

  0%|          | 0/9846 [00:00<?, ?it/s]

In [38]:
import pandas as pd

df_q_counts = pd.DataFrame()
df_q_counts["size"] = q_sizes

In [39]:
df_q_counts.describe()

Unnamed: 0,size
count,9846.0
mean,442.543571
std,368.110089
min,17.0
25%,200.0
50%,365.0
75%,571.0
max,7831.0


In [40]:
import numpy as np

np.argmax(df_q_counts["size"])


4824

In [41]:
guantaco_dataset[4824]

{'text': '### Human: 什麼是期貨？### Assistant: 期货的英文名是Futures，期货是相对于现货来说的，所谓的现货交易就是一手交钱一手交货，而期货是我需要的东西还没有生产出来，但我把全部货款或者部分货款作为定金先给供货商，过段时间供货商给我交货就行。比如，我们现在买的期房都是先付钱，过段时间开发商才会交房。\n期货在历史上是从远期合约开始的，远期合约在人类历史上很早就出现了，最早可以追溯到公元前的古希腊时代。远期合约市场最早出现在日本的江户幕府时代，那个时代，稻米是一种非常重要战略资源，因为一旦准备打仗，将军就需要向农户大量买米备战，但是将军不知道明年的米价是上涨还是下跌，于是将军就对农户说“我现在先把钱付给你，你就按这个价格明年给我稻米，不管明年米价是涨是跌，必须按这个价格来”。将军和农户达成的这个约定就叫做远期合约。\n\n远期合约最大的作用就是套期保值。我们知道商品的价格是在波动的，比如说将军和农户签订合约的时候，他们会按某一个价格去计算，但是明年稻米的价格不一定是他们约定的这个价格，如果稻米价格上涨，按市场价卖，那么对农户自然是有利的，因为他可以卖更多钱；但对于将军来说，他买米需要花更多的钱，所以将军害怕稻米价格上涨，需要合约来提前锁定价格；反之，如果明年稻米价格下跌，自然对将军有利，但对农户来说，他们会少卖钱甚至可能亏本，所以农户害怕稻米价格下跌，也需要通过合约来提前锁定价格。既然双方都有这个需求，于是一个远期现货交易的合约就产生了。合同约定，明年的某个时候，以约定的价格来交易，谁也别吃亏，谁也别占便宜，这就是所谓的套期保值。\n所以说套期保值对于一些生产型企业是有很大帮助的，可以减少企业的经营风险。\n\n不过远期合约也存在很多的问题。首先，远期合约很容易出现毁约的情况。继续用将军跟农户买稻米的事举例，通常情况下，将军肯定不会先把所有的货款都给农户，只会给一部分定金，比如先给100两银子作为定金，约定明年农户给将军100担稻米时，将军再把尾款付给农户。但到了第二年100担稻米的价格涨价到300两银子了，这时候农户一算发现如果自己要是毁约，不把稻米卖给将军，自己赚的更多呀，于是农户打算把原来的定金退还给将军，然后带着粮食到价高的地方去卖。如果这样的事发生，将军就吃亏了。反过来说，将军也可能毁约，比如第二年的时候稻米价格下降

In [9]:
dataset[0]

{'text': '<s>[INST]answer the following multiple choice question with a single letter matching the option for the correct answer. use the provided context with your own knowledge to find the correct answer.the set of possible letters for options is A, B, C, D, E. answer with the letter of the correct answer only, and nothing more. \n\ncontext: Some of these standards are examples of dynamical time scales and/or of coordinate time scales. A standard for civil time can specify both time intervals and time-of- day. Modern civil time is generally national standard time in a time zone at a fixed offset from Coordinated Universal Time (UTC), possibly adjusted by daylight saving time during part of the year. As defined, TCB (as observed from the Earth\'s surface) is of divergent rate relative to all of ET, Teph and TDT/TT;P K Seidelmann & T Fukushima (1992), "Why new time scales?", Astronomy & Astrophysics vol.265 (1992), pages 833-838, including Fig. 1 at p.835, a graph giving an overview of

In [13]:
print(eval_dataset[0]["text"])

<s>[INST]answer the following multiple choice question with a single letter matching the option for the correct answer. use the provided context with your own knowledge to find the correct answer.the set of possible letters for options is A, B, C, D, E. answer with the letter of the correct answer only, and nothing more. 

context: The presence of a clustered thick disk-like component of dark matter in the Galaxy has been suggested by Sanchez-Salcedo (1997, 1999) and Kerins (1997).Kerins, E. J. 1997, Astronomy and Astrophysics, 322, 709-718 (ADS entry )Sánchez-Salcedo, F. J. 1997, Astrophysical Journal, 487, L61-L64 (ADS entry )Sánchez-Salcedo, F. J. 1999, Monthly Notices of the Royal Astronomical Society, 303, 755-772 (ADS entry ) ==See also== * Dark matter * Brown dwarfs * White dwarfs * Microlensing * Hypercompact stellar system * Massive compact halo object (MACHOs) * Weakly interacting massive particles (WIMPs) ==References== Category:Star clusters Category:Open clusters Observati

In [9]:
q = """[INST]answer the following multiple choice question with a single letter matching the option for the correct answer. use the provided context with your own knowledge to find the correct answer.the set of possible letters for options is A, B, C, D, E. answer with the letter of the correct answer only, and nothing more.

context: The presence of a clustered thick disk-like component of dark matter in the Galaxy has been suggested by Sanchez-Salcedo (1997, 1999) and Kerins (1997).Kerins, E. J. 1997, Astronomy and Astrophysics, 322, 709-718 (ADS entry )Sánchez-Salcedo, F. J. 1997, Astrophysical Journal, 487, L61-L64 (ADS entry )Sánchez-Salcedo, F. J. 1999, Monthly Notices of the Royal Astronomical Society, 303, 755-772 (ADS entry ) ==See also== * Dark matter * Brown dwarfs * White dwarfs * Microlensing * Hypercompact stellar system * Massive compact halo object (MACHOs) * Weakly interacting massive particles (WIMPs) ==References== Category:Star clusters Category:Open clusters Observations of the Bullet Cluster are the strongest evidence for the existence of dark matter; however, Brownstein and Moffat have shown that their modified gravity theory can also account for the properties of the cluster. == Observational methods == Clusters of galaxies have been found in surveys by a number of observational techniques and have been studied in detail using many methods: * Optical or infrared: The individual galaxies of clusters can be studied through optical or infrared imaging and spectroscopy. The observed distortions can be used to model the distribution of dark matter in the cluster. == Temperature and density == Clusters of galaxies are the most recent and most massive objects to have arisen in the hierarchical structure formation of the Universe and the study of clusters tells one about the way galaxies form and evolve. A 2021 article postulated that approximately 50% of all baryonic matter is outside dark matter haloes, filling the space between galaxies, and that this would explain the missing baryons not accounted for in the 2017 paper. == Current state == Currently, many groups have observed the intergalactic medium and circum-galactic medium to obtain more measurements and observations of baryons to support the leading observations. In cosmology, the missing baryon problem is an observed discrepancy between the amount of baryonic matter detected from shortly after the Big Bang and from more recent epochs. Brownstein and Moffat use a theory of modified gravity to explain X-ray cluster masses without dark matter. The missing baryon problem has been resolved but research groups are working to detect the WHIM using varying methods to confirm results. ==References== Category:Physical cosmology Category:Baryons Baryons make up only ~5% of the universe, while dark matter makes up 26.8%. ==Early universe measurements== The abundance of baryonic matter in the early universe can be obtained indirectly from two independent methods: * The theory of Big Bang nucleosynthesis, which predicts the observed relative abundance of the chemical elements in observations of the recent universe. The missing baryon problem is different from the dark matter problem, which is non-baryonic in nature.See Lambda-CDM model. In a typical cluster perhaps only 5% of the total mass is in the form of galaxies, maybe 10% in the form of hot X-ray emitting gas and the remainder is dark matter. In astronomy, a RAMBO or robust association of massive baryonic objects is a dark cluster made of brown dwarfs or white dwarfs. It is composed of mostly ionized hydrogen and is about 10% of a galaxy cluster's total mass; the rest being dark matter. This is highly nontrivial, since although luminous matter such as stars and galaxies are easily summed, baryonic matter can also exist in highly non-luminous form, such as black holes, planets, and highly diffuse interstellar gas. Cosmological hydrodynamical simulations from theory predict that a fraction of the missing baryons are located in galactic haloes at temperatures of 106 K and the (WHIM) at temperatures of 105–107 K, with recent observations providing strong support. 50x50px Available under CC BY 4.0. In models for the gravitational formation of structure with cold dark matter, the smallest structures collapse first and eventually build the largest structures, clusters of galaxies. Large scale galaxy surveys in the 2000s revealed a baryon deficit. At the same time, a census of baryons in the recent observable universe has found that observed baryonic matter accounts for less than half of that amount. A mass deficit is the amount of mass (in stars) that has been removed from the center of a galaxy, presumably by the action of a binary supermassive black hole. thumb|left|The figure illustrates how mass deficits are measured, using the observed brightness profile of a galaxy The density of stars increases toward the center in most galaxies. One claim of a solution was published in 2017 when two groups of scientists said they found evidence for the location of missing baryons in intergalactic matter. When observed visually, clusters appear to be collections of galaxies held together by mutual gravitational attraction. 

question: Which of the following statements accurately describes the impact of Modified Newtonian Dynamics (MOND) on the observed "missing baryonic mass" discrepancy in galaxy clusters?

answer options:
A: MOND is a theory that reduces the observed missing baryonic mass in galaxy clusters by postulating the existence of a new form of matter called "fuzzy dark matter."\
B: MOND is a theory that increases the discrepancy between the observed missing baryonic mass in galaxy clusters and the measured velocity dispersions from a factor of around 10 to a factor of about 20.\
C: MOND is a theory that explains the missing baryonic mass in galaxy clusters that was previously considered dark matter by demonstrating that the mass is in the form of neutrinos and axions.\
D: MOND is a theory that reduces the discrepancy between the observed missing baryonic mass in galaxy clusters and the measured velocity dispersions from a factor of around 10 to a factor of about 2.\
E: MOND is a theory that eliminates the observed missing baryonic mass in galaxy clusters by imposing a new mathematical formulation of gravity that does not require the existence of dark matter.\
[/INST]"""

In [10]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
from transformers.generation import GenerationConfig
from tqdm.notebook import tqdm

tokenizer = AutoTokenizer.from_pretrained(llm, local_files_only=True)

In [13]:
%%time
torch_device = "cuda:0"
model_inputs = tokenizer(q, return_tensors='pt').to(torch_device)
greedy_output = model.generate(**model_inputs, max_new_tokens=3, eos_token_id=tokenizer.eos_token_id)
response = tokenizer.decode(greedy_output[0], skip_special_tokens=True)
print(response)


[INST]answer the following multiple choice question with a single letter matching the option for the correct answer. use the provided context with your own knowledge to find the correct answer.the set of possible letters for options is A, B, C, D, E. answer with the letter of the correct answer only, and nothing more.

context: The presence of a clustered thick disk-like component of dark matter in the Galaxy has been suggested by Sanchez-Salcedo (1997, 1999) and Kerins (1997).Kerins, E. J. 1997, Astronomy and Astrophysics, 322, 709-718 (ADS entry )Sánchez-Salcedo, F. J. 1997, Astrophysical Journal, 487, L61-L64 (ADS entry )Sánchez-Salcedo, F. J. 1999, Monthly Notices of the Royal Astronomical Society, 303, 755-772 (ADS entry ) ==See also== * Dark matter * Brown dwarfs * White dwarfs * Microlensing * Hypercompact stellar system * Massive compact halo object (MACHOs) * Weakly interacting massive particles (WIMPs) ==References== Category:Star clusters Category:Open clusters Observations 

In [25]:
%%time
torch_device = "cuda:0"
model_inputs = tokenizer(q, return_tensors='pt').to(torch_device)
greedy_output = model.generate(**model_inputs, max_new_tokens=40, eos_token_id=tokenizer.eos_token_id)
response = tokenizer.decode(greedy_output[0], skip_special_tokens=True)
print(response)


[INST]answer the following multiple choice question with a single letter matching the option for the correct answer. use the provided context with your own knowledge to find the correct answer.the set of possible letters for options is A, B, C, D, E. answer with the letter of the correct answer only, and nothing more.

context: The presence of a clustered thick disk-like component of dark matter in the Galaxy has been suggested by Sanchez-Salcedo (1997, 1999) and Kerins (1997).Kerins, E. J. 1997, Astronomy and Astrophysics, 322, 709-718 (ADS entry )Sánchez-Salcedo, F. J. 1997, Astrophysical Journal, 487, L61-L64 (ADS entry )Sánchez-Salcedo, F. J. 1999, Monthly Notices of the Royal Astronomical Society, 303, 755-772 (ADS entry ) ==See also== * Dark matter * Brown dwarfs * White dwarfs * Microlensing * Hypercompact stellar system * Massive compact halo object (MACHOs) * Weakly interacting massive particles (WIMPs) ==References== Category:Star clusters Category:Open clusters Observations 

In [18]:
!ls /mystuff/llm/Llama-2-7b-chat-hf

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
LICENSE.txt			  pytorch_model-00001-of-00002.bin
README.md			  pytorch_model-00002-of-00002.bin
USE_POLICY.md			  pytorch_model.bin.index.json
config.json			  special_tokens_map.json
generation_config.json		  tokenizer.json
model-00001-of-00002.safetensors  tokenizer.model
model-00002-of-00002.safetensors  tokenizer_config.json
model.safetensors.index.json


In [19]:
llm = "/mystuff/llm/Llama-2-7b-chat-hf"

In [8]:
llm = "/mystuff/notebooks/llama_finetuned_base_8k"