In [1]:
def get_completion(query: str, model, tokenizer) -> str:
  device = "cuda:0"

  prompt_template = """
  Below is an instruction that describes a task. Write a response that appropriately completes the request.
  ### Question:
  {query}

  ### Answer:
  """
  prompt = prompt_template.format(query=query)

  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)

  model_inputs = encodeds.to(device)


  generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
  decoded = tokenizer.batch_decode(generated_ids)
  return (decoded[0])

In [2]:
!pip install --upgrade pip
!pip install -U bitsandbytes
!pip install  -U git+https://github.com/huggingface/transformers.git
!pip install  -U git+https://github.com/huggingface/peft.git
!pip install -U git+https://github.com/huggingface/accelerate.git
!pip install datasets
!pip install pandas
!pip install matplotlib
!pip install scipy

Collecting pip
  Downloading pip-23.3.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 22.2.2
    Uninstalling pip-22.2.2:
      Successfully uninstalled pip-22.2.2
Successfully installed pip-23.3.1
Collecting bitsandbytes
  Downloading bitsandbytes-0.41.2.post2-py3-none-any.whl.metadata (9.8 kB)
Downloading bitsandbytes-0.41.2.post2-py3-none-any.whl (92.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.41.2.post2
Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-al9px812
  Running command git clone


KeyboardInterrupt



In [None]:
import torch

print("PyTorch version:", torch.__version__)

# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available.")
    print("Number of GPU:", torch.cuda.device_count())
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
        print(torch.cuda.get_device_properties(i))
        print(f"Memory GB: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
        print(f"GPU Allocated: {torch.cuda.memory_allocated(i) / 1024 ** 3:.2f} GB")
        print(f"GPU Cached:    {torch.cuda.memory_reserved(i) / 1024 ** 3:.2f} GB")
        
        
else:
    print("CUDA is not available. No GPU detected.")


In [None]:
!nvidia-smi

In [None]:
from datasets import load_dataset
import pandas as pd

# Set display options for pandas
pd.set_option('display.max_columns', None)  # Show all columns in the DataFrame
pd.set_option('display.max_colwidth', None)  # Ensure the full content of each cell is displayed

# Load the dataset
data = load_dataset("gbharti/finance-alpaca", split='train')

# Convert to pandas DataFrame
df = data.to_pandas()

# Display the first 10 rows of the DataFrame
df.head(10)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming df is your DataFrame and it's already loaded

# Basic Dataset Information
print("Basic Dataset Information:")
print(f"Number of Rows: {df.shape[0]}")
print(f"Number of Columns: {df.shape[1]}")
print(f"Column Names: {df.columns.tolist()}", end="\n\n")

# Memory Usage
print("Memory Usage by Column:")
print(df.memory_usage(deep=True), end="\n\n")

# Data Types
print("Data Types of Each Column:")
print(df.dtypes, end="\n\n")

# Calculating the length of each cell in each column
analysis_df = df.copy()
analysis_df['num_characters_instruction'] = analysis_df['instruction'].apply(len)
analysis_df['num_characters_input'] = analysis_df['input'].apply(len)
analysis_df['num_characters_output'] = analysis_df['output'].apply(len)

# Show Distribution
analysis_df.hist(column=['num_characters_instruction', 'num_characters_input', 'num_characters_output'], bins=30, figsize=(12, 8))
plt.suptitle('Distribution of Character Counts in Each Column')
plt.show()

# Descriptive Statistics for Character Counts
print("Descriptive Statistics for Character Counts:")
print(analysis_df[['num_characters_instruction', 'num_characters_input', 'num_characters_output']].describe(), end="\n\n")

# Additional Detailed Statistics
max_chars_instruction = analysis_df['num_characters_instruction'].max()
max_chars_input = analysis_df['num_characters_input'].max()
max_chars_output = analysis_df['num_characters_output'].max()

min_chars_instruction = analysis_df['num_characters_instruction'].min()
min_chars_input = analysis_df['num_characters_input'].min()
min_chars_output = analysis_df['num_characters_output'].min()

# Print detailed statistics
# Missing Values
print("Missing Values in Each Column:")
print(analysis_df.isnull().sum(), end="\n\n")

# Unique Values
print("Unique Values in Each Column:")
print(analysis_df.nunique(), end="\n\n")


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
model_id = "meta-llama/Llama-2-7b-hf"
base_model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")
base_tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)

In [None]:
result = get_completion(query="Will capital gains affect my tax bracket?", model=base_model, tokenizer=base_tokenizer)
print(result)

In [None]:
def generate_prompt(data_point):
    """Gen. input text based on a prompt, task instruction, (context info.), and answer

    :param data_point: dict: Data point
    :return: dict: tokenzed prompt
    """
    # Samples with additional context into.
    if data_point['input']:
        text = 'Below is an instruction that describes a task, paired with an input that provides' \
               ' further context. Write a response that appropriately completes the request.\n\n'
        text += f'### Instruction:\n{data_point["instruction"]}\n\n'
        text += f'### Input:\n{data_point["input"]}\n\n'
        text += f'### Response:\n{data_point["output"]}'

    # Without
    else:
        text = 'Below is an instruction that describes a task. Write a response that ' \
               'appropriately completes the request.\n\n'
        text += f'### Instruction:\n{data_point["instruction"]}\n\n'
        text += f'### Response:\n{data_point["output"]}'
    return text

# add the "prompt" column in the dataset
text_column = [generate_prompt(data_point) for data_point in data]
data = data.add_column("prompt", text_column)

In [None]:
data = data.shuffle(seed=1234)  # Shuffle dataset here
data = data.map(lambda samples: base_tokenizer(samples["prompt"]), batched=True)

In [None]:
data = data.train_test_split(test_size=0.1)
train_data = data["train"]
test_data = data["test"]

In [None]:
print(train_data)
print(test_data)

In [None]:
from peft import prepare_model_for_kbit_training

base_model.gradient_checkpointing_enable()
base_model = prepare_model_for_kbit_training(base_model)

In [None]:
print(base_model)

In [None]:
import bitsandbytes as bnb
def find_all_linear_names(model):
  cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
  lora_module_names = set()
  for name, module in model.named_modules():
    if isinstance(module, cls):
      names = name.split('.')
      lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names: # needed for 16-bit
      lora_module_names.remove('lm_head')
  return list(lora_module_names)

In [None]:
modules = find_all_linear_names(base_model)
print(modules)

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=modules,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

base_model = get_peft_model(base_model, lora_config)

In [None]:
trainable, total = base_model.get_nb_trainable_parameters()
print(f"Trainable: {trainable} | total: {total} | Percentage: {trainable/total*100:.4f}%")

In [None]:
pip install ipywidgets

In [None]:
!pip install ipywidgets
!pip install  trl

In [None]:
import transformers

from trl import SFTTrainer

base_tokenizer.pad_token = base_tokenizer.eos_token
torch.cuda.empty_cache()

trainer = SFTTrainer(
    model=base_model,
    train_dataset=train_data,
    eval_dataset=test_data,
    dataset_text_field="prompt",
    peft_config=lora_config,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=16,
        warmup_steps=50, #Absolute number of warmup steps
        max_steps=1000,
        learning_rate=1e-5,
        # logging_dir="./logs",
        logging_first_step=True,
        logging_steps=20,
        evaluation_strategy="steps",
        optim="adamw_torch",
        eval_steps=50,
        output_dir="outputs",
        load_best_model_at_end=True,
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(base_tokenizer, mlm=False),
)

In [None]:
import transformers

from trl import SFTTrainer

base_tokenizer.pad_token = base_tokenizer.eos_token
torch.cuda.empty_cache()

trainer = SFTTrainer(
    model=base_model,
    train_dataset=train_data,
    eval_dataset=test_data,
    dataset_text_field="prompt",
    peft_config=lora_config,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=16,
        warmup_steps=50, #Absolute number of warmup steps
        max_steps=1000,
        learning_rate=1e-5,
        logging_dir="./logs",
        logging_first_step=True,
        logging_steps=20,
        evaluation_strategy="steps",
        optim="adamw_torch",
        eval_steps=50,
        output_dir="outputs",
        load_best_model_at_end=True,
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(base_tokenizer, mlm=False),
)

In [None]:
base_model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train("./outputs/checkpoint-1000")

In [None]:
base_model.push_to_hub("Llama-2-7b-hf_finetuned_finance_jupyter_v5")
base_tokenizer.push_to_hub("Llama-2-7b-hf_finetuned_finance_jupyter_v5")

In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "bkpandey/Llama-2-7b-hf_finetuned_finance_jupyter_v5"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_4bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
result = get_completion(query="Will capital gains affect my tax bracket?", model=model, tokenizer=tokenizer)
print(result)