In [None]:
!pip install transformers

In [None]:
import os
import psutil
import platform
try:
    import GPUtil
except ImportError:
    print("GPUtil module not installed. GPU information will not be available.")

# CPU Information
cpus = os.cpu_count()
print("Logical CPUs:", cpus)
print("Physical CPUs:", psutil.cpu_count(logical=False))

# System Memory
ram = psutil.virtual_memory()
print("Total RAM (GB):", round(ram.total / (1024 ** 3), 2))
print("Available RAM (GB):", round(ram.available / (1024 ** 3), 2))
print("Used RAM (GB):", round(ram.used / (1024 ** 3), 2))

# Disk Information
print("\nDisk Information:")
for partition in psutil.disk_partitions():
    try:
        usage = psutil.disk_usage(partition.mountpoint)
        print(f"  Mountpoint: {partition.mountpoint}")
        print(f"    Total Size (GB): {round(usage.total / (1024 ** 3), 2)}")
        print(f"    Used Space (GB): {round(usage.used / (1024 ** 3), 2)}")
        print(f"    Free Space (GB): {round(usage.free / (1024 ** 3), 2)}")
        print(f"    Percentage Used: {usage.percent}%")
    except PermissionError:
        print(f"  No Permission to access {partition.mountpoint}")

# GPU Information (if GPUtil is available)
if 'GPUtil' in globals():
    gpus = GPUtil.getGPUs()
    if gpus:
        for i, gpu in enumerate(gpus):
            print(f"\nGPU {i}: {gpu.name}")
            print(f"  Total VRAM (GB): {round(gpu.memoryTotal / 1024, 2)}")
            print(f"  Used VRAM (GB): {round(gpu.memoryUsed / 1024, 2)}")
            print(f"  Free VRAM (GB): {round(gpu.memoryFree / 1024, 2)}")
            print(f"  GPU Load (%): {gpu.load * 100}")
    else:
        print("No GPU found or GPUtil cannot find it.")
else:
    print("GPU information not available due to missing GPUtil.")


In [None]:
# Device Type and Count
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_devices = torch.cuda.device_count()
print(f'Using device: {device}')
print(f'Number of available devices: {num_devices}')

In [None]:
import torch
import gc
torch.cuda.empty_cache()
gc.collect()

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
checkpoint  = [
    "BioMistral/BioMistral-7B",
    "uygarkurt/llama-3-merged-linear",
    "01-ai/Yi-1.5-9B",
    "facebook/opt-30b"
]
model_to_load = 0
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForCausalLM.from_pretrained(checkpoint[model_to_load],
                                             torch_dtype=torch.float16,
                                             device_map='auto',
                                             attn_implementation="flash_attention_2"
                                            )

tokenizer = AutoTokenizer.from_pretrained(checkpoint[model_to_load], return_token_type_ids=False)

In [None]:
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

In [None]:
# !accelerate launch --multi_gpu --num_processes=2 train_script.py
# !accelerate launch --multi_gpu --mixed_precision=fp16 --num_processes=2 train_script.py
# !accelerate launch --multi_gpu --mixed_precision=fp16 --num_processes=2 train_script.py

In [None]:
# Loading model on CPUs would crash them, instead loaad them on GPUs, using the parameter
# device_map ="cuda"/"auto" in loading pretrained model
# By loading model onto gpus, even if we load multiple models onto gpus, the notebook wouldnt crash but just instead free upt the space which was occupied by the previous model

# When you are loading a model, you can load the model either on a cpu or a gpu. After loading these models would
# persist in the cpu/gpu and not

# When device is mapped to auto, it would try to load mjaority of the model on the gpus, but once gpus
# reach their limit the rest of model is loaded onto the cpu, but if the model is still big enoughy
# the cpu's ram would be filled completely and the notebook would crash, ultimately being not able to load the
# model into memory


# Interesting Observation, On the first load of the 30 billion paramter model into memory, the notebook crashed
# on the second load of the model, the model loaded perfectly into the memory, without using any of the CPU's ram
