[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1-o7cBASVRfv3mqgIol7WBEHI9JYvAoUu#scrollTo=L7OZ9c89Seb7)
# Step 1: Install Required Libraries

In [1]:
!pip install datasets transformers torch

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_c

In [2]:
!nvidia-smi

Wed Apr  2 09:20:35 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   41C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

# Step 2: Load and Split the Dataset

In [3]:
from datasets import load_dataset, DatasetDict

# Load the dataset from Hugging Face
dataset = load_dataset("mtpti5iD/redhat-docs_dataset")

# Split the dataset
split_dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
test_valid = split_dataset["test"].train_test_split(test_size=0.5, seed=42)

# Combine the splits into a single DatasetDict
dataset_dict = DatasetDict({
    "train": split_dataset["train"],
    "val": test_valid["train"],
    "test": test_valid["test"]
})

print("Dataset split completed!")

README.md:   0%|          | 0.00/1.81k [00:00<?, ?B/s]

train-00000-of-00002.parquet:   0%|          | 0.00/113M [00:00<?, ?B/s]

train-00001-of-00002.parquet:   0%|          | 0.00/115M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/56.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/44592 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/11149 [00:00<?, ? examples/s]

Dataset split completed!


# Define the Model Architecture

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertModel, BertConfig

# Define the model architecture
class TransformerModel(nn.Module):
    def __init__(self, hidden_size, num_classes):
        super(TransformerModel, self).__init__()
        self.bert = BertModel(BertConfig())
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[1]
        logits = self.fc(pooled_output)
        return logits

# Set up the model
hidden_size = 768  # BERT base hidden size
num_classes = 2  # Example: binary classification
model = TransformerModel(hidden_size, num_classes)

# Set up the optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

print("Model architecture defined!")

Model architecture defined!


# Step 4: Train the Model

In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer

# Define a custom dataset
class CustomDataset(Dataset):
    def __init__(self, dataset, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.dataset = dataset
        self.max_len = max_len

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        text = self.dataset[index]['content']
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True
        )
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        return {
            'input_ids': torch.tensor(input_ids, dtype=torch.long),
            'attention_mask': torch.tensor(attention_mask, dtype=torch.long)
        }

# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Create dataloaders
train_dataset = CustomDataset(dataset_dict['train'], tokenizer, max_len=512)
val_dataset = CustomDataset(dataset_dict['val'], tokenizer, max_len=512)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# Training loop
epochs = 3
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, torch.zeros(len(outputs), dtype=torch.long))  # Example target
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_train_loss}")

    # Validation loop
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(outputs, torch.zeros(len(outputs), dtype=torch.long))  # Example target
            total_val_loss += loss.item()

    avg_val_loss = total_val_loss / len(val_loader)
    print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {avg_val_loss}")

print("Training completed!")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [5]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup
import torch.cuda.amp as amp
import torch.backends.cudnn as cudnn

# Enable TensorFloat-32 for faster matrix multiplications on T4 GPU
torch.backends.cuda.matmul.allow_tf32 = True
cudnn.benchmark = True  # Optimize cuDNN operations

# Define dataset
class CustomDataset(Dataset):
    def __init__(self, dataset, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.dataset = dataset
        self.max_len = max_len

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        text = self.dataset[index]['content']
        inputs = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_tensors="pt",
            truncation=True
        )
        return {
            'input_ids': inputs['input_ids'].squeeze(0),
            'attention_mask': inputs['attention_mask'].squeeze(0),
        }

# Load tokenizer and dataset
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
train_dataset = CustomDataset(dataset_dict['train'], tokenizer, max_len=256)
val_dataset = CustomDataset(dataset_dict['val'], tokenizer, max_len=256)

# Optimize DataLoader with num_workers and pin_memory
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=2, pin_memory=True)

# Define model with pre-trained weights
class TransformerModel(nn.Module):
    def __init__(self, num_classes):
        super(TransformerModel, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        return self.fc(pooled_output)

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerModel(num_classes=2).to(device)
model = torch.compile(model)  # TorchDynamo for graph compilation

# Optimizer, Loss, Scheduler
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()
epochs = 3
total_steps = len(train_loader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

# Mixed Precision (AMP) Training
scaler = amp.GradScaler()

# Training loop with gradient accumulation
accumulation_steps = 2  # Reduce to 2 for faster updates on T4
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for i, batch in enumerate(train_loader):
        input_ids = batch["input_ids"].to(device, non_blocking=True)
        attention_mask = batch["attention_mask"].to(device, non_blocking=True)
        labels = torch.zeros(len(input_ids), dtype=torch.long).to(device)

        with torch.amp.autocast(device_type="cuda"):  # Enable mixed precision for T4 acceleration
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()

        if (i + 1) % accumulation_steps == 0:  # Update weights after every 2 batches
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()
            optimizer.zero_grad()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {total_loss / len(train_loader)}")

    # Validation
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device, non_blocking=True)
            attention_mask = batch["attention_mask"].to(device, non_blocking=True)
            labels = torch.zeros(len(input_ids), dtype=torch.long).to(device)

            with amp.autocast():
                outputs = model(input_ids, attention_mask)
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {total_val_loss / len(val_loader)}")

# Save model to Google Drive
save_dir = "/content/drive/MyDrive/saved_model"
os.makedirs(save_dir, exist_ok=True)

torch.save(model.state_dict(), os.path.join(save_dir, "rh_model.pth"))
tokenizer.save_pretrained(save_dir)

print("Model and tokenizer saved successfully!")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

  scaler = amp.GradScaler()
W0402 09:25:32.001000 1900 torch/_inductor/utils.py:1137] [0/0] Not enough SMs to use max_autotune_gemm mode


Epoch 1/3, Training Loss: 0.002940008895843125


  with amp.autocast():
  with amp.autocast():


Epoch 1/3, Validation Loss: 3.325378624609702e-05
Epoch 2/3, Training Loss: 2.7426731306929017e-05
Epoch 2/3, Validation Loss: 1.1764765366930457e-05
Epoch 3/3, Training Loss: 1.1984140838963222e-05
Epoch 3/3, Validation Loss: 6.25157606814632e-06
Model and tokenizer saved successfully!


In [7]:
!pip install huggingface_hub




In [11]:
from huggingface_hub import login, HfApi, HfFolder, Repository
import os

# Get the Hugging Face token from the environment variable
hf_token = os.getenv('HF_TOKEN')

login(token=hf_token)

repo_name = "redhat-docs-llm"
user_name = "mtpti5iD"

api = HfApi()
api.create_repo(repo_name, token=hf_token, private=False)

repo_path = f"{user_name}/{repo_name}"
repo = Repository(repo_path, clone_from=repo_path)

os.system(f"cp -r {save_dir}/* {repo_path}")

repo.push_to_hub()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
/content/mtpti5iD/redhat-docs-llm is already a clone of https://huggingface.co/mtpti5iD/redhat-docs-llm. Make sure you pull the latest changes with `repo.git_pull()`.


OSError: Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@ce99e9a90ba0.(none)')


In [14]:
!git config --global user.email "msidrm455@gmail.com"
!git config --global user.name "mtpti5iD"
!git config --global --add safe.directory '*'

In [16]:
from huggingface_hub import login, HfApi, Repository
import os

# Get the Hugging Face token from the environment variable
hf_token = os.getenv('HF_TOKEN')
login(token=hf_token)

repo_name = "redhat-docs-llm"
user_name = "mtpti5iD"

api = HfApi()
api.create_repo(repo_name, token=hf_token, private=False)

# Define local repo path
repo_path = f"./{repo_name}"  # Clone repo locally
repo_url = f"https://huggingface.co/{user_name}/{repo_name}"

# Clone or connect to the repository
repo = Repository(local_dir=repo_path, clone_from=repo_url)
repo.git_pull()  # Ensure it's up-to-date

# Copy files into repo directory
#save_dir = "/path/to/your/files"  # Set the correct path to your files
os.system(f"cp -r {save_dir}/* {repo_path}/")

# Commit and push changes
repo.git_add(auto_lfs_track=True)  # Add files to Git
repo.git_commit("Updated files")   # Commit changes
repo.git_push()                     # Push to Hugging Face Hub

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
/content/./redhat-docs-llm is already a clone of https://huggingface.co/mtpti5iD/redhat-docs-llm. Make sure you pull the latest changes with `repo.git_pull()`.


OSError: warning: no common commits
From https://huggingface.co/mtpti5iD/redhat-docs-llm
 + 13b3559...f59b0ff main       -> origin/main  (forced update)
hint: You have divergent branches and need to specify how to reconcile them.
hint: You can do so by running one of the following commands sometime before
hint: your next pull:
hint: 
hint:   git config pull.rebase false  # merge (the default strategy)
hint:   git config pull.rebase true   # rebase
hint:   git config pull.ff only       # fast-forward only
hint: 
hint: You can replace "git config" with "git config --global" to set a default
hint: preference for all repositories. You can also pass --rebase, --no-rebase,
hint: or --ff-only on the command line to override the configured default per
hint: invocation.
fatal: Need to specify how to reconcile divergent branches.


In [17]:
!pwd

/content


In [18]:
!cd /content/drive/MyDrive/saved_model

In [20]:
!pwd
!git config pull.rebase false

/content
fatal: not in a git directory


In [23]:
!git clone https://huggingface.co/mtpti5iD/redhat-docs-llm /content/redhat-docs-llm_new

Cloning into '/content/redhat-docs-llm_new'...
remote: Enumerating objects: 3, done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 3 (from 1)[K
Unpacking objects: 100% (3/3), 1.05 KiB | 1.05 MiB/s, done.


In [29]:
!ls -la /content/redhat-docs-llm

total 428008
drwxr-xr-x 3 root root      4096 Apr  2 10:49 .
drwxr-xr-x 1 root root      4096 Apr  2 10:56 ..
drwxr-xr-x 9 root root      4096 Apr  2 10:53 .git
-rw-r--r-- 1 root root      1519 Apr  2 10:49 .gitattributes
-rw------- 1 root root 438021229 Apr  2 10:49 rh_model.pth
-rw------- 1 root root       125 Apr  2 10:49 special_tokens_map.json
-rw------- 1 root root      1272 Apr  2 10:49 tokenizer_config.json
-rw------- 1 root root    231508 Apr  2 10:49 vocab.txt


In [37]:
%cd /content/redhat-docs-llm
!git add .

/content/redhat-docs-llm
From https://huggingface.co/mtpti5iD/redhat-docs-llm
 * branch            main       -> FETCH_HEAD
[KSuccessfully rebased and updated refs/heads/main.


In [52]:
%cd /content/redhat-docs-llm
!git add .

/content/redhat-docs-llm


In [53]:
%cd /content/redhat-docs-llm
!git status

/content/redhat-docs-llm
On branch main
Your branch is up to date with 'origin/main'.

Changes to be committed:
  (use "git restore --staged <file>..." to unstage)
	[32mnew file:   config.json[m



In [54]:
%cd /content/redhat-docs-llm
!git commit -m "new chnages in config"

/content/redhat-docs-llm
[main 6a18553] new chnages in config
 1 file changed, 1 insertion(+)
 create mode 100644 config.json


In [55]:
%cd /content/redhat-docs-llm
!git push origin main

/content/redhat-docs-llm
Enumerating objects: 4, done.
Counting objects: 100% (4/4), done.
Delta compression using up to 2 threads
Compressing objects: 100% (3/3), done.
Writing objects: 100% (3/3), 357 bytes | 357.00 KiB/s, done.
Total 3 (delta 1), reused 0 (delta 0), pack-reused 0
To https://huggingface.co/mtpti5iD/redhat-docs-llm
   c18a09a..6a18553  main -> main


In [41]:
!pip install transformers torch



In [44]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer

# Define model class
class TransformerModel(nn.Module):
    def __init__(self, num_classes):
        super(TransformerModel, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        return self.fc(pooled_output)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize model
model = TransformerModel(num_classes=2).to(device)

# Load saved model state_dict properly
checkpoint_path = "/content/drive/MyDrive/saved_model/rh_model.pth"
state_dict = torch.load(checkpoint_path, map_location=device)

# Fix unexpected key prefix issue (_orig_mod.)
new_state_dict = {}
for key, value in state_dict.items():
    new_key = key.replace("_orig_mod.", "")  # Remove _orig_mod. prefix if exists
    new_state_dict[new_key] = value

model.load_state_dict(new_state_dict)  # Load corrected state_dict

# Compile the model after loading (if needed)
model = torch.compile(model)

# Set model to evaluation mode
model.eval()

print("✅ Model loaded and compiled successfully!")

✅ Model loaded and compiled successfully!


In [45]:
# Load tokenizer
tokenizer = BertTokenizer.from_pretrained("/content/drive/MyDrive/saved_model")

# Example inference function
def predict(text):
    inputs = tokenizer(
        text,
        add_special_tokens=True,
        padding="max_length",
        truncation=True,
        max_length=256,
        return_tensors="pt"
    )

    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    with torch.no_grad():
        outputs = model(input_ids, attention_mask)

    prediction = torch.argmax(outputs, dim=1).item()
    return prediction

# Example usage
sample_text = "This is a test sentence."
prediction = predict(sample_text)
print(f"Predicted class: {prediction}")

Predicted class: 0


In [46]:
import json

config = {
    "architectures": ["TransformerModel"],
    "model_type": "bert",
    "hidden_size": 768,
    "num_labels": 2
}

with open("/content/drive/MyDrive/saved_model/config.json", "w") as f:
    json.dump(config, f)

print("Config file created!")

Config file created!


In [49]:
!ls -la /content/redhat-docs-llm

total 428016
drwxr-xr-x 3 root root      4096 Apr  2 11:14 .
drwxr-xr-x 1 root root      4096 Apr  2 10:56 ..
-rw------- 1 root root        98 Apr  2 11:14 config.json
drwxr-xr-x 9 root root      4096 Apr  2 11:05 .git
-rw-r--r-- 1 root root      1519 Apr  2 10:49 .gitattributes
-rw-r--r-- 1 root root 438021229 Apr  2 11:04 rh_model.pth
-rw-r--r-- 1 root root       125 Apr  2 11:04 special_tokens_map.json
-rw-r--r-- 1 root root      1272 Apr  2 11:04 tokenizer_config.json
-rw-r--r-- 1 root root    231508 Apr  2 11:04 vocab.txt


In [48]:
!cp /content/drive/MyDrive/saved_model/config.json /content/redhat-docs-llm/

In [63]:
import torch

# Load your saved model checkpoint
checkpoint_path = "/content/drive/MyDrive/saved_model/rh_model.pth"
state_dict = torch.load(checkpoint_path, map_location="cpu")

# Remove `_orig_mod.` prefix from keys
new_state_dict = {}
for key, value in state_dict.items():
    new_key = key.replace("_orig_mod.", "")  # Remove unnecessary prefix
    new_state_dict[new_key] = value

# Save the fixed checkpoint
torch.save(new_state_dict, "/content/drive/MyDrive/saved_model/fixed_rh_model.pth")

print("✅ Fixed checkpoint saved as fixed_rh_model.pth")

✅ Fixed checkpoint saved as fixed_rh_model.pth


In [62]:
!ls -la

total 855752
drwxr-xr-x 3 root root      4096 Apr  2 11:21 .
drwxr-xr-x 1 root root      4096 Apr  2 10:56 ..
-rw------- 1 root root        98 Apr  2 11:14 config.json
-rw-r--r-- 1 root root 438001339 Apr  2 11:21 fixed_rh_model.pth
drwxr-xr-x 9 root root      4096 Apr  2 11:15 .git
-rw-r--r-- 1 root root      1519 Apr  2 10:49 .gitattributes
-rw-r--r-- 1 root root 438021229 Apr  2 11:04 rh_model.pth
-rw-r--r-- 1 root root       125 Apr  2 11:04 special_tokens_map.json
-rw-r--r-- 1 root root      1272 Apr  2 11:04 tokenizer_config.json
-rw-r--r-- 1 root root    231508 Apr  2 11:04 vocab.txt


In [65]:
import torch

# Load the saved model checkpoint
checkpoint_path = "fixed_rh_model.pth"
state_dict = torch.load(checkpoint_path, map_location="cpu")

# Rename fc to classifier (for BERT)
new_state_dict = {}
for key, value in state_dict.items():
    new_key = key.replace("fc.", "classifier.")  # Rename classifier keys
    new_state_dict[new_key] = value

# Save the fixed checkpoint
torch.save(new_state_dict, "bert_fixed_rh_model.pth")

print("✅ Fixed model checkpoint saved as bert_fixed_rh_model.pth")

✅ Fixed model checkpoint saved as bert_fixed_rh_model.pth


In [72]:
from transformers import AutoModelForSequenceClassification

# Load base model
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")

# Load fixed weights
model.load_state_dict(torch.load("bert_fixed_rh_model.pth", map_location="cpu"))

# Save in Hugging Face format
model.save_pretrained("/content/redhat-docs-llm")

print("✅ Model converted successfully!")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Model converted successfully!


In [75]:
%cd /content/redhat-docs-llm
!ls -la /content/redhat-docs-llm
!ls -la /content/redhat-docs-llm/redhat-docs-llm

/content/redhat-docs-llm
total 1711196
drwxr-xr-x 4 root root      4096 Apr  2 11:27 .
drwxr-xr-x 1 root root      4096 Apr  2 10:56 ..
-rw-r--r-- 1 root root 438002364 Apr  2 11:23 bert_fixed_rh_model.pth
-rw------- 1 root root       638 Apr  2 11:27 config.json
-rw-r--r-- 1 root root 438001339 Apr  2 11:21 fixed_rh_model.pth
drwxr-xr-x 9 root root      4096 Apr  2 11:15 .git
-rw-r--r-- 1 root root      1519 Apr  2 10:49 .gitattributes
-rw-r--r-- 1 root root 437958648 Apr  2 11:28 model.safetensors
drwxr-xr-x 2 root root      4096 Apr  2 11:23 redhat-docs-llm
-rw-r--r-- 1 root root 438021229 Apr  2 11:04 rh_model.pth
-rw-r--r-- 1 root root       125 Apr  2 11:04 special_tokens_map.json
-rw-r--r-- 1 root root      1272 Apr  2 11:04 tokenizer_config.json
-rw-r--r-- 1 root root    231508 Apr  2 11:04 vocab.txt
total 427712
drwxr-xr-x 2 root root      4096 Apr  2 11:23 .
drwxr-xr-x 4 root root      4096 Apr  2 11:27 ..
-rw-r--r-- 1 root root       638 Apr  2 11:23 config.json
-rw-r--r-- 1

In [76]:
from huggingface_hub import HfApi

repo_id = "mtpti5iD/redhat-docs-llm"  # Update with your HF repo

api = HfApi()
api.upload_file(
    path_or_fileobj="model.safetensors",
    path_in_repo="model.safetensors",
    repo_id=repo_id,
    repo_type="model",
)

api.upload_file(
    path_or_fileobj="config.json",
    path_in_repo="config.json",
    repo_id=repo_id,
    repo_type="model",
)

print("✅ Model uploaded to Hugging Face successfully!")

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

✅ Model uploaded to Hugging Face successfully!


In [77]:
from transformers import AutoModel

model = AutoModel.from_pretrained("mtpti5iD/redhat-docs-llm", trust_remote_code=True)
print("✅ Model loaded successfully from Hugging Face!")

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

✅ Model loaded successfully from Hugging Face!


In [78]:
from transformers import BertForSequenceClassification, BertTokenizer
import torch

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Load model
model = BertForSequenceClassification.from_pretrained("/content/redhat-docs-llm")
model.eval()

# Test Input
text = "How do I configure a firewall in RHEL?"
inputs = tokenizer(text, return_tensors="pt")

# Get predictions
with torch.no_grad():
    outputs = model(**inputs)

print(outputs.logits)  # Check model output

tensor([[ 5.5115, -5.5847]])


In [79]:
from transformers import BertForSequenceClassification, BertTokenizer
import torch
import json

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Load model
model = BertForSequenceClassification.from_pretrained("/content/redhat-docs-llm")
model.eval()

# Load label mapping (if available)
label_map_path = "/content/redhat-docs-llm/label_map.json"  # Adjust if needed

try:
    with open(label_map_path, "r") as f:
        label_map = json.load(f)  # Example: {"0": "Networking", "1": "Security", "2": "Storage"}
except FileNotFoundError:
    label_map = {str(i): f"Class {i}" for i in range(model.config.num_labels)}  # Fallback

# Test Input
text = "How do I configure a firewall in RHEL?"
inputs = tokenizer(text, return_tensors="pt")

# Get predictions
with torch.no_grad():
    outputs = model(**inputs)

# Convert logits to class probabilities
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)

# Get predicted class index
predicted_class_idx = torch.argmax(probs, dim=-1).item()

# Map to label name
predicted_label = label_map[str(predicted_class_idx)]

# Print results
print(f"Input: {text}")
print(f"Predicted Answer: {predicted_label}")
print(f"Confidence: {probs[0][predicted_class_idx].item():.4f}")

Input: How do I configure a firewall in RHEL?
Predicted Answer: Class 0
Confidence: 1.0000


In [80]:
!ls -la /content/redhat-docs-llm

total 1711196
drwxr-xr-x 4 root root      4096 Apr  2 11:27 .
drwxr-xr-x 1 root root      4096 Apr  2 10:56 ..
-rw-r--r-- 1 root root 438002364 Apr  2 11:23 bert_fixed_rh_model.pth
-rw------- 1 root root       638 Apr  2 11:27 config.json
-rw-r--r-- 1 root root 438001339 Apr  2 11:21 fixed_rh_model.pth
drwxr-xr-x 9 root root      4096 Apr  2 11:15 .git
-rw-r--r-- 1 root root      1519 Apr  2 10:49 .gitattributes
-rw-r--r-- 1 root root 437958648 Apr  2 11:28 model.safetensors
drwxr-xr-x 2 root root      4096 Apr  2 11:23 redhat-docs-llm
-rw-r--r-- 1 root root 438021229 Apr  2 11:04 rh_model.pth
-rw-r--r-- 1 root root       125 Apr  2 11:04 special_tokens_map.json
-rw-r--r-- 1 root root      1272 Apr  2 11:04 tokenizer_config.json
-rw-r--r-- 1 root root    231508 Apr  2 11:04 vocab.txt


In [82]:
label_map = {
    "0": "Security",
    "1": "Networking",
    "2": "Storage",
    "3": "System Administration",
    "4": "Performance Tuning",
    "5": "Kernel Tuning",
    "6": "Package Management",
    "7": "SELinux",
    "8": "User Management",
    "9": "Logging",
    "10": "Networking Basics",
    "80": "Unknown Category"
}

import json

# Save label map to file
file_path = "/content/redhat-docs-llm/label_map.json"
with open(file_path, "w") as f:
    json.dump(label_map, f, indent=4)

print(f"Saved label_map.json at {file_path}")

# Verify by reading the file
with open(file_path, "r") as f:
    print(json.load(f))  # This will print the saved content

Saved label_map.json at /content/redhat-docs-llm/label_map.json
{'0': 'Security', '1': 'Networking', '2': 'Storage', '3': 'System Administration', '4': 'Performance Tuning', '5': 'Kernel Tuning', '6': 'Package Management', '7': 'SELinux', '8': 'User Management', '9': 'Logging', '10': 'Networking Basics', '80': 'Unknown Category'}


In [83]:
from transformers import BertForSequenceClassification, BertTokenizer
import torch
import json

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Load model
model = BertForSequenceClassification.from_pretrained("/content/redhat-docs-llm")
model.eval()

# Load label mapping
label_map_path = "/content/redhat-docs-llm/label_map.json"

try:
    with open(label_map_path, "r") as f:
        label_map = json.load(f)  # Read label mapping file
except FileNotFoundError:
    print("⚠️ Warning: label_map.json not found! Using default labels.")
    label_map = {str(i): f"Class {i}" for i in range(model.config.num_labels)}  # Fallback

# Test Input
text = "How do I configure a firewall in RHEL?"
inputs = tokenizer(text, return_tensors="pt")

# Get predictions
with torch.no_grad():
    outputs = model(**inputs)

# Convert logits to class probabilities
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)

# Get predicted class index
predicted_class_idx = torch.argmax(probs, dim=-1).item()

# Map to label name
predicted_label = label_map.get(str(predicted_class_idx), f"Unknown Class {predicted_class_idx}")

# Print results
print(f"Input: {text}")
print(f"Predicted Answer: {predicted_label}")
print(f"Confidence: {probs[0][predicted_class_idx].item():.4f}")

Input: How do I configure a firewall in RHEL?
Predicted Answer: Security
Confidence: 1.0000
