In [1]:
import subprocess
import os

def kill_gpu_processes():
    # Run nvidia-smi and get the output to identify the GPU processes
    try:
        # Fetching the list of processes that are using the GPU
        gpu_processes = subprocess.check_output(
            ['nvidia-smi', '--query-compute-apps=pid,process_name,used_memory', '--format=csv,noheader,nounits'],
            encoding='utf-8'
        )
        
        # Splitting the output into lines (each line represents a process)
        gpu_processes = gpu_processes.strip().split('\n')

        # If no processes found
        if not gpu_processes or gpu_processes[0] == '':
            print("No GPU processes found.")
            return

        # Iterate over each process
        for process_info in gpu_processes:
            pid, process_name, used_memory = process_info.split(',')
            pid = pid.strip()
            process_name = process_name.strip()
            used_memory = used_memory.strip()

            print(f"Terminating process {process_name} (PID: {pid}) using {used_memory} MiB of GPU memory.")

            # Kill the process
            os.system(f'taskkill /PID {pid} /F' if os.name == 'nt' else f'kill -9 {pid}')

        print("All GPU processes have been terminated.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Run the function
kill_gpu_processes()


Terminating process C:\Users\gdco-user\AppData\Local\Programs\Python\Python39\python.exe (PID: 8936) using 17816 MiB of GPU memory.
All GPU processes have been terminated.


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load the tokenizer and model on CPU initially
tokenizer = AutoTokenizer.from_pretrained("cnpoojitha/UK_MA_labeling")
model = AutoModelForCausalLM.from_pretrained("cnpoojitha/UK_MA_labeling").to("cpu")

# Convert the model to 16-bit precision (float16)
model.half()

# Check the precision and memory usage of the model parameters
total_params = 0
total_size_bytes = 0

print("Model loaded in 16-bit precision (float16). Checking parameter details...\n")

for name, param in model.named_parameters():
    param_size = param.nelement()  # Get number of elements in the parameter
    param_type = param.dtype       # Get data type (precision level) of the parameter

    # Determine the size in bytes based on the precision level (dtype)
    if param_type == torch.float32:
        param_size_bytes = param_size * 4  # 4 bytes per element for float32
    elif param_type == torch.float16:
        param_size_bytes = param_size * 2  # 2 bytes per element for float16
    elif param_type == torch.int8:
        param_size_bytes = param_size      # 1 byte per element for int8
    else:
        param_size_bytes = "unknown"       # Unknown precision level

    # Print the precision level first, followed by parameter name, size, and memory usage
    print(f"Parameter: {name}, dtype: {param_type}")

    if param_size_bytes == "unknown":
        print(f"  Size: {param_size} elements, memory: unknown")
    else:
        print(f"  Size: {param_size} elements, memory: {param_size_bytes / (1024**2):.2f} MB")

    if param_size_bytes != "unknown":
        total_params += param_size
        total_size_bytes += param_size_bytes

# Print total number of parameters and total model size (if known)
print(f"\nTotal parameters: {total_params}")
if total_size_bytes != 0:
    print(f"Total model size: {total_size_bytes / (1024**2):.2f} MB (in 16-bit precision)")
else:
    print(f"Total model size: unknown")

# Check GPU memory
gpu_memory_in_bytes = torch.cuda.get_device_properties(0).total_memory  # Total GPU memory in bytes
gpu_memory_in_mb = gpu_memory_in_bytes / (1024**2)  # Convert to MB

print(f"\nTotal GPU memory available: {gpu_memory_in_mb:.2f} MB")

if total_size_bytes != "unknown" and total_size_bytes <= gpu_memory_in_bytes:
    print("Sufficient GPU memory is available. Moving model to GPU...")
    model.to("cuda")  
else:
    if total_size_bytes == "unknown":
        print("Model size unknown, unable to check GPU memory sufficiency.")
    else:
        print("Insufficient GPU memory. Consider further optimization like using float16 or offloading some layers.")


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading shards: 100%|██████████| 4/4 [08:27<00:00, 126.76s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:11<00:00,  2.80s/it]


Model loaded in 16-bit precision (float16). Checking parameter details...

Parameter: model.embed_tokens.weight, dtype: torch.float16
  Size: 525336576 elements, memory: 1002.00 MB
Parameter: model.layers.0.self_attn.q_proj.weight, dtype: torch.float16
  Size: 16777216 elements, memory: 32.00 MB
Parameter: model.layers.0.self_attn.k_proj.weight, dtype: torch.float16
  Size: 4194304 elements, memory: 8.00 MB
Parameter: model.layers.0.self_attn.v_proj.weight, dtype: torch.float16
  Size: 4194304 elements, memory: 8.00 MB
Parameter: model.layers.0.self_attn.o_proj.weight, dtype: torch.float16
  Size: 16777216 elements, memory: 32.00 MB
Parameter: model.layers.0.mlp.gate_proj.weight, dtype: torch.float16
  Size: 58720256 elements, memory: 112.00 MB
Parameter: model.layers.0.mlp.up_proj.weight, dtype: torch.float16
  Size: 58720256 elements, memory: 112.00 MB
Parameter: model.layers.0.mlp.down_proj.weight, dtype: torch.float16
  Size: 58720256 elements, memory: 112.00 MB
Parameter: model.la

In [3]:
import pandas as pd

file_path = r'D:\Prismaccess\UK_MA.xlsx'  # Replace with your file path
df = pd.read_excel(file_path)

# Check if the 'Therapeutic Area' column exists
if 'Therapeutic Area' not in df.columns:
    raise ValueError("The column 'Therapeutic Area' does not exist in the Excel file.")

# If 'TA_Model' column does not exist, create an empty one
if 'TA_Model' not in df.columns:
    df['TA_Model'] = None

# Prepare the instruction for the model
instruction = "Extract the therapeutic area from the given input and use a comma separator in cases where multiple therapeutic areas are present"
cnt=0

# Iterate through each row in the Excel file
for idx, row in df.iterrows():
    # Only process rows where 'TA_Model' is not already filled
    if pd.isna(row['TA_Model']):
        input_text = row['Therapeutic Area']  # Extract input from 'Therapeutic Area' column

        # Create the Alpaca-style prompt
        alpaca_prompt = f"""
        ### Instruction:
        {instruction}

        ### Input:
        {input_text}

        ### Output:
        """

        # Tokenize the input
        inputs = tokenizer(alpaca_prompt, return_tensors="pt").to("cuda")

        # Generate output from the model
        outputs = model.generate(
            **inputs,
            max_new_tokens=64,
            use_cache=True,
            num_beams=5,
            early_stopping=True
        )

        # Decode the generated output
        decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)

        # Extract the text directly after "### Output:"
        extracted_output = decoded_output[0].split("### Output:")[1].strip()

        # Store the extracted output in the DataFrame in the 'TA_Model' column
        df.at[idx, 'TA_Model'] = extracted_output
        cnt=cnt+1
        # Save the updated DataFrame to Excel after each iteration
        if cnt>=1000:
            df.to_excel(file_path, index=False)
            cnt=0

        print(f"Processed row {idx + 1}/{len(df)}. Saved to '{file_path}'")
        
        # print(decoded_output[0])
df.to_excel(file_path, index=False)       
print(f"Processing complete. Final DataFrame saved as '{file_path}'")

  attn_output = torch.nn.functional.scaled_dot_product_attention(


Processed row 1/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 2/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 3/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 4/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 5/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 6/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 7/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 8/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 9/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 10/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 11/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 12/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 13/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 14/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 15/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 16/31788. Saved to 'D:\Prismaccess\UK_MA.xlsx'
Processed row 17/31788. Saved to 

OutOfMemoryError: CUDA out of memory. Tried to allocate 4.98 GiB. GPU 0 has a total capacity of 22.29 GiB of which 1.67 GiB is free. Of the allocated memory 17.67 GiB is allocated by PyTorch, and 2.64 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)