In [2]:
import subprocess

def run_nvidia_smi():
    try:
        # Run the nvidia-smi command
        result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        # Check if the command was successful
        if result.returncode == 0:
            print("nvidia-smi output:\n")
            print(result.stdout)
        else:
            print("Error running nvidia-smi:\n")
            print(result.stderr)

    except Exception as e:
        print(f"An error occurred: {e}")

# Call the function to run nvidia-smi
run_nvidia_smi()

nvidia-smi output:

Thu Nov  7 23:41:40 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A40                     On  |   00000000:17:00.0 Off |                    0 |
|  0%   52C    P0             86W /  300W |    3717MiB /  46068MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA A40           

In [7]:
import os
gpu_index = 3
os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_index}"
from huggingface_hub import login
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, pipeline


In [12]:
# Check if CUDA is available
if torch.cuda.is_available():
    num_devices = torch.cuda.device_count()
    print(f"Number of available CUDA devices: {num_devices}")
    
    for i in range(num_devices):
        device_name = torch.cuda.get_device_name(i)
        print(f"\nDevice {i}: {device_name}")
else:
    print("CUDA is not available.")
# Specify the device (0 for GPU or -1 for CPU)
device = 0 if torch.cuda.is_available() else -1

Number of available CUDA devices: 1

Device 0: NVIDIA A40


In [10]:
config = PeftConfig.from_pretrained("smartinez1/Llama-3.1-8B-FINLLM")
base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
model = PeftModel.from_pretrained(base_model, "smartinez1/Llama-3.1-8B-FINLLM")
# Load the tokenizer associated with the base model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")

Loading checkpoint shards: 100%|██████████████████| 4/4 [00:02<00:00,  1.44it/s]


In [21]:
from transformers import pipeline

# Set up the text generation pipeline with the PEFT model, specifying the device
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)

# List of user inputs
user_inputs = [
    "Provide a link for Credit Card Accountability Responsibility and Disclosure Act law.",
    "Define the following term: National Automated Clearing House Association.",
    "Expand the following acronym into its full form: ESMA."
]

# Define the prompt template
prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Answer:
"""

# Loop over each user input and generate a response
for user_input in user_inputs:
    # Format the user input into the prompt
    prompt = prompt_template.format(user_input)

    # Generate a response from the model
    response = generator(prompt, max_length=200, num_return_sequences=1, do_sample=True)

    # Extract and clean up the AI's response
    response_str = response[0]['generated_text'].split('### Answer:')[1].strip()
    cut_ind = response_str.find("#")  # Remove extra information after the response
    response_str = response_str[:cut_ind].strip() if cut_ind != -1 else response_str

    # Display the AI's response
    print(f"User: {user_input}")
    print(f"AI: {response_str}")
    print("-" * 50)  # Separator for clarity


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausa

User: Provide a link for Credit Card Accountability Responsibility and Disclosure Act law.
AI: Credit Card Accountability Responsibility and Disclosure Act: https://www.federalreserve.gov/publicinfo/files/201407-hfesurvey.pdf
--------------------------------------------------


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


User: Define the following term: National Automated Clearing House Association.
AI: A clearinghouse that provides services for the transfer of funds between financial institutions.
--------------------------------------------------
User: Expand the following acronym into its full form: ESMA.
AI: European Securities and Markets Authority
--------------------------------------------------
