## Memory usage and FLOPs of each model

In [1]:
import pandas as pd
import numpy as np
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
import time
from tqdm.notebook import tqdm
from transformers import pipeline
import torch
import psutil
import subprocess
import gc
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from thop import profile, clever_format
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_LAUNCH_BLOCKING=1


In [2]:
print(torch.cuda.is_available())
print(torch.cuda.current_device())

if torch.cuda.is_available():
    device = 0  # Use the first GPU
else:
    device = -1  # Use the CPU

True
0


## Functions to monitor memory usage

In [3]:
# Function to get current CPU memory usage
def get_cpu_memory_usage():
    process = psutil.Process()
    mem_info = process.memory_info()
    return int(mem_info.rss / (1024 ** 2))  # Convert bytes to MB

# Function to get current GPU memory usage
def get_gpu_memory_usage():
    result = subprocess.run(
        ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,nounits,noheader"],
        stdout=subprocess.PIPE,
        text=True,
        check=True
    )
    gpu_memory = result.stdout.strip()
    return int(gpu_memory)  # Memory in MB

### DistilBERT

In [4]:
!nvidia-smi

Sun Jul 14 21:09:35 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off | 00000000:81:00.0 Off |                    0 |
| N/A   34C    P0              68W / 500W |      8MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [5]:
# GPU Meomory Usage
initial_gpu_memory = get_gpu_memory_usage()
print(f"Initial GPU memory usage of distilBERT: {initial_gpu_memory} MB")

# Load the model
pipe_distilbert = pipeline("text-classification", model="AdamCodd/distilbert-base-uncased-finetuned-sentiment-amazon", device=0)

# Memory Usage Calculation
after_loading_gpu_memory = get_gpu_memory_usage()
print(f"GPU memory usage after loading distilBERT: {after_loading_gpu_memory} MB")

gpu_memory_used_by_model = after_loading_gpu_memory - initial_gpu_memory

print(f"GPU memory used by distilBERT: {gpu_memory_used_by_model} MB")

Initial GPU memory usage of distilBERT: 8 MB
GPU memory usage after loading distilBERT: 720 MB
GPU memory used by distilBERT: 712 MB


In [6]:
# CPU Meomory Usage
initial_cpu_memory = get_cpu_memory_usage()
print(f"Initial CPU memory usage of distilBERT: {initial_cpu_memory} MB")

# Load the model
pipe_distilbert = pipeline("text-classification", model="AdamCodd/distilbert-base-uncased-finetuned-sentiment-amazon", device=-1)

# Memory Usage Calculation
after_loading_cpu_memory = get_cpu_memory_usage()
print(f"CPU memory usage after loading distilBERT: {after_loading_cpu_memory} MB")

cpu_memory_used_by_model = after_loading_cpu_memory - initial_cpu_memory

print(f"CPU memory used by distilBERT: {cpu_memory_used_by_model} MB")

Initial CPU memory usage of distilBERT: 705 MB
CPU memory usage after loading distilBERT: 960 MB
CPU memory used by distilBERT: 255 MB


### ELECTRA

In [7]:
!nvidia-smi

Sun Jul 14 21:09:37 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off | 00000000:81:00.0 Off |                    0 |
| N/A   34C    P0              76W / 500W |    720MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [8]:
# GPU Meomory Usage
initial_gpu_memory = get_gpu_memory_usage()
print(f"Initial GPU memory usage of ELECTRA: {initial_gpu_memory} MB")

# Load the model
pipe_electra = pipeline("text-classification", model="pig4431/amazonPolarity_ELECTRA_5E", device=0)

# Memory Usage Calculation
after_loading_gpu_memory = get_gpu_memory_usage()
print(f"GPU memory usage after loading ELECTRA: {after_loading_gpu_memory} MB")

gpu_memory_used_by_model = after_loading_gpu_memory - initial_gpu_memory

print(f"GPU memory used by ELECTRA: {gpu_memory_used_by_model} MB")

Initial GPU memory usage of ELECTRA: 720 MB
GPU memory usage after loading ELECTRA: 900 MB
GPU memory used by ELECTRA: 180 MB


In [9]:
# CPU Meomory Usage
initial_cpu_memory = get_cpu_memory_usage()
print(f"Initial CPU memory usage of ELECTRA: {initial_cpu_memory} MB")

# Load the model
pipe_electra = pipeline("text-classification", model="pig4431/amazonPolarity_ELECTRA_5E", device=-1)

# Memory Usage Calculation
after_loading_cpu_memory = get_cpu_memory_usage()
print(f"CPU memory usage after loading ELECTRA: {after_loading_cpu_memory} MB")

cpu_memory_used_by_model = after_loading_cpu_memory - initial_cpu_memory

print(f"CPU memory used by ELECTRA: {cpu_memory_used_by_model} MB")

Initial CPU memory usage of ELECTRA: 1246 MB
CPU memory usage after loading ELECTRA: 1552 MB
CPU memory used by ELECTRA: 306 MB


### Flan-T5

In [10]:
!nvidia-smi

Sun Jul 14 21:09:39 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off | 00000000:81:00.0 Off |                    0 |
| N/A   34C    P0              76W / 500W |    900MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [11]:
# GPU Meomory Usage
initial_gpu_memory = get_gpu_memory_usage()
print(f"Initial GPU memory usage of Flan-T5: {initial_gpu_memory} MB")

# Load the model
pipeFlanT5 = pipeline("text2text-generation", model="google/flan-t5-xxl", device=0)

# Memory Usage Calculation
after_loading_gpu_memory = get_gpu_memory_usage()
print(f"GPU memory usage after loading Flan-T5: {after_loading_gpu_memory} MB")

gpu_memory_used_by_model = after_loading_gpu_memory - initial_gpu_memory

print(f"GPU memory used by Flan-T5: {gpu_memory_used_by_model} MB")

Initial GPU memory usage of Flan-T5: 900 MB


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

GPU memory usage after loading Flan-T5: 43312 MB
GPU memory used by Flan-T5: 42412 MB


In [12]:
# CPU Meomory Usage
initial_cpu_memory = get_cpu_memory_usage()
print(f"Initial CPU memory usage of Flan-T5: {initial_cpu_memory} MB")

# Load the model
pipeFlanT5 = pipeline("text2text-generation", model="google/flan-t5-xxl", device=-1)

# Memory Usage Calculation
after_loading_cpu_memory = get_cpu_memory_usage()
print(f"CPU memory usage after loading Flan-T5: {after_loading_cpu_memory} MB")

cpu_memory_used_by_model = after_loading_cpu_memory - initial_cpu_memory

print(f"CPU memory used by Flan-T5: {cpu_memory_used_by_model} MB")

Initial CPU memory usage of Flan-T5: 1557 MB


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

CPU memory usage after loading Flan-T5: 43975 MB
CPU memory used by Flan-T5: 42418 MB


### Flan-UL2

In [13]:
!nvidia-smi

Sun Jul 14 21:11:12 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off | 00000000:81:00.0 Off |                    0 |
| N/A   33C    P0              67W / 500W |  43312MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [14]:
# GPU Meomory Usage
initial_gpu_memory = get_gpu_memory_usage()
print(f"Initial GPU memory usage of Flan-UL2: {initial_gpu_memory} MB")

# Load the model
pipeFlanUL2 = pipeline("text2text-generation", model="google/flan-ul2", device=0)

# Memory Usage Calculation
after_loading_gpu_memory = get_gpu_memory_usage()
print(f"GPU memory usage after loading Flan-UL2: {after_loading_gpu_memory} MB")

gpu_memory_used_by_model = after_loading_gpu_memory - initial_gpu_memory

print(f"GPU memory used by Flan-UL2: {gpu_memory_used_by_model} MB")

Initial GPU memory usage of Flan-UL2: 43312 MB


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

GPU memory usage after loading Flan-UL2: 74716 MB
GPU memory used by Flan-UL2: 31404 MB


In [15]:
# CPU Meomory Usage
initial_cpu_memory = get_cpu_memory_usage()
print(f"Initial CPU memory usage of Flan-UL2: {initial_cpu_memory} MB")

# Load the model
pipeFlanUL2 = pipeline("text2text-generation", model="google/flan-ul2", device=-1)

# Memory Usage Calculation
after_loading_cpu_memory = get_cpu_memory_usage()
print(f"CPU memory usage after loading Flan-UL2: {after_loading_cpu_memory} MB")

cpu_memory_used_by_model = after_loading_cpu_memory - initial_cpu_memory

print(f"CPU memory used by Flan-UL2: {cpu_memory_used_by_model} MB")

Initial CPU memory usage of Flan-UL2: 43980 MB


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

CPU memory usage after loading Flan-UL2: 118216 MB
CPU memory used by Flan-UL2: 74236 MB


|Model|DistilBERT|ELECTRA|FlanT5|FlanUL2|
|-|-|-|-|-|
|GPU(MB)|712|180|42412|31404|
|CPU(MB)|255|306|42418|74236|

### FLOPs Calculation

### DistilBERT

In [16]:
# Load the model
model_name = "AdamCodd/distilbert-base-uncased-finetuned-sentiment-amazon"
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define an example input
example_text = "This is a great product!"
inputs = tokenizer(example_text, return_tensors="pt")
example_input = (inputs['input_ids'],)

# Calculate MACs and params
macs, params = profile(model, inputs=example_input)
flops = macs * 2  # Each MAC operation corresponds to 2 FLOPs (1 multiplication + 1 addition)
macs, flops, params = clever_format([macs, flops, params], "%.3f")

print(f"MACs: {macs}")
print(f"FLOPs: {flops}")
print(f"Number of parameters: {params}")

[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
MACs: 340.649M
FLOPs: 681.299M
Number of parameters: 43.121M


### ELECTRA

In [17]:
# Load the model
model_name = "pig4431/amazonPolarity_ELECTRA_5E"
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define an example input
example_text = "This is a great product!"
inputs = tokenizer(example_text, return_tensors="pt")
example_input = (inputs['input_ids'],)

# Calculate MACs and params
macs, params = profile(model, inputs=example_input)
flops = macs * 2  # Each MAC operation corresponds to 2 FLOPs (1 multiplication + 1 addition)
macs, flops, params = clever_format([macs, flops, params], "%.3f")

print(f"MACs: {macs}")
print(f"FLOPs: {flops}")
print(f"Number of parameters: {params}")

[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
MACs: 680.683M
FLOPs: 1.361G
Number of parameters: 85.648M


### Flan-T5

In [18]:
# Load the model
model_name = "google/flan-t5-xxl"
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define an example input
example_text = "This is a great product!"
inputs = tokenizer(example_text, return_tensors="pt")
example_input = (inputs['input_ids'],)

# Calculate MACs and params
macs, params = profile(model, inputs=example_input)
flops = macs * 2  # Each MAC operation corresponds to 2 FLOPs (1 multiplication + 1 addition)
macs, flops, params = clever_format([macs, flops, params], "%.3f")

print(f"MACs: {macs}")
print(f"FLOPs: {flops}")
print(f"Number of parameters: {params}")

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google/flan-t5-xxl and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
MACs: 86.990G
FLOPs: 173.980G
Number of parameters: 10.888G


### Flan-UL2

In [19]:
# Load the model
model_name = "google/flan-ul2"
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define an example input
example_text = "This is a great product!"
inputs = tokenizer(example_text, return_tensors="pt")
example_input = (inputs['input_ids'],)

# Calculate MACs and params
macs, params = profile(model, inputs=example_input)
flops = macs * 2  # Each MAC operation corresponds to 2 FLOPs (1 multiplication + 1 addition)
macs, flops, params = clever_format([macs, flops, params], "%.3f")

print(f"MACs: {macs}")
print(f"FLOPs: {flops}")
print(f"Number of parameters: {params}")

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google/flan-ul2 and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
MACs: 154.636G
FLOPs: 309.271G
Number of parameters: 19.344G


|Model|DistilBERT|ELECTRA|FlanT5|FlanUL2|
|-|-|-|-|-|
|MACs|340.649M|680.683M|86.990G|154.636G|
|FLOPs|681.299M|1.361G|173.980G|309.271G|
|Params|43.121M|85.648M|10.888G|19.344G|