In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from tqdm.notebook import tqdm
from transformers import GPT2Tokenizer

In [28]:
# Initialize tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
vocab_size = tokenizer.vocab_size
print(vocab_size)

50257


In [29]:
text = "This thesis addresses the critical need for effective Fault Detection and Isolation (FDI) in green hydrogen (GH2) production, a key player in mitigating the greenhouse effect. To tackle this challenge, this thesis introduces a hybrid strategy for FDI. Extensive reviews of FDI algorithms reveal a gap in existing literature, emphasizing accuracy but neglecting the need for labeled data. Additionally, explainability in Hybrid-FDI is often overlooked. The proposed hybrid approach aims to be efficient in data usage and explainable, leveraging physics-based models and Artificial Intelligence (AI). This study introduces Bond Graph-Convolutional Neural Net (BG-CNN), a novel hybrid FDI method addressing AI model training challenges for fault diagnosis. BG-CNN combines BG residual generation and CNN-based fault classification, particularly in scenarios with limited labeled data. Additionally, a Self-Supervised Learning (SSL) method enhances FDI in such situations. The study also discusses Bond Graph-eXplainable AI (BG-XAI), an occlusion-based method, emphasizing the importance of meaningful explanations for fault predictions, showcasing its effectiveness through visualizations. The BG-CNN method with SSL was employed for the FDI of the Proton Exchange Membrane (PEM) electrolyzer and railway tracks, surpassing the performance of traditional methods. Comparative analysis demonstrated the superior performance of the proposed method, particularly in scenarios with limited labeled data, outperforming state-of-the-art SSL methods. The BG-XAI method was used to provide explanations for predictions in accordance with structural analysis."

In [30]:
# Step 1: Prepare the Dataset
class TextDataset(Dataset):
    def __init__(self, text, seq_length, tokenizer):
        self.seq_length = seq_length
        self.tokenizer = tokenizer
        self.data = tokenizer.encode(text)
        self.vocab_size = tokenizer.vocab_size

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.seq_length]
        y = self.data[idx + 1:idx + self.seq_length + 1]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)

In [31]:
 # Prepare data
seq_length = 10
batch_size = 16

dataset = TextDataset(text, seq_length, tokenizer)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [32]:
for x,y in dataloader:
    break

print(x.shape,y.shape)

torch.Size([16, 10]) torch.Size([16, 10])


In [8]:
tokenizer.batch_decode(x)

['), a novel hybrid FDI method addressing AI model',
 '-based models and Artificial Intelligence (AI). This',
 'izations. The BG-CNN method with SSL was',
 ' in Hybrid-FDI is often overlooked. The',
 '-Supervised Learning (SSL) method enhances F',
 ' method addressing AI model training challenges for fault diagnosis.',
 ' algorithms reveal a gap in existing literature, emphasizing accuracy',
 ' and Artificial Intelligence (AI). This study introduces Bond',
 'ative analysis demonstrated the superior performance of the proposed method',
 ' Self-Supervised Learning (SSL) method enhances',
 'Supervised Learning (SSL) method enhances FDI',
 'FDI) in green hydrogen (GH2)',
 ' leveraging physics-based models and Artificial Intelligence (AI',
 ' Proton Exchange Membrane (PEM)',
 ' predictions, showcasing its effectiveness through visualizations. The',
 ' explanations for fault predictions, showcasing its effectiveness through visual']

In [9]:
tokenizer.batch_decode(y)

[' a novel hybrid FDI method addressing AI model training',
 'based models and Artificial Intelligence (AI). This study',
 '. The BG-CNN method with SSL was employed',
 ' Hybrid-FDI is often overlooked. The proposed',
 'Supervised Learning (SSL) method enhances FDI',
 ' addressing AI model training challenges for fault diagnosis. BG',
 ' reveal a gap in existing literature, emphasizing accuracy but',
 ' Artificial Intelligence (AI). This study introduces Bond Graph',
 ' analysis demonstrated the superior performance of the proposed method,',
 '-Supervised Learning (SSL) method enhances F',
 'vised Learning (SSL) method enhances FDI in',
 'DI) in green hydrogen (GH2) production',
 ' physics-based models and Artificial Intelligence (AI).',
 'ton Exchange Membrane (PEM) electroly',
 ', showcasing its effectiveness through visualizations. The BG',
 ' for fault predictions, showcasing its effectiveness through visualizations']

In [3]:
x_text = "This thesis addresses the critical need"
x = tokenizer(x_text, return_tensors='pt').input_ids
print(x,x.shape)

tensor([[ 1212, 21554,  9405,   262,  4688,   761]]) torch.Size([1, 6])


In [4]:
embed_size = 64

embedding = nn.Embedding(vocab_size, embed_size)

x_emb = embedding(x)

print(x_emb.shape)

torch.Size([1, 6, 64])


In [6]:
seq_length = 10

positional_encoding = nn.Parameter(torch.zeros(1, seq_length, embed_size))
print(positional_encoding.shape)

torch.Size([1, 10, 64])


In [7]:
x_emb_pos_encoding = x_emb + positional_encoding[:, :x_emb.shape[1], :]
print(x_emb_pos_encoding.shape)

torch.Size([1, 6, 64])


In [8]:

num_heads = 4
hidden_dim = 128


transformer_layer = nn.TransformerEncoderLayer(
                d_model=embed_size,
                nhead=num_heads,
                dim_feedforward=hidden_dim,
                batch_first=True
            )

In [9]:
num_weights=0
for p in transformer_layer.parameters():
    num_weights += p.numel()

print(num_weights)

33472


In [10]:
x_contextual_emb = transformer_layer(x_emb_pos_encoding)

In [11]:
x_contextual_emb.shape

torch.Size([1, 6, 64])

In [12]:
x_contextual_emb[0][0]

tensor([ 7.3243e-01,  3.5839e-02, -4.4310e-01,  1.1105e-01,  4.1360e-01,
         2.3524e-01,  6.0227e-01,  2.2501e-03, -1.7107e+00, -7.1124e-01,
         1.0923e+00,  5.4993e-02, -8.4960e-01, -1.1927e+00,  1.5672e+00,
        -5.4057e-01,  4.7778e-01, -4.2656e-01, -1.6243e+00, -1.6682e+00,
         8.1536e-02,  2.0591e-01, -5.9917e-01,  2.4865e+00, -4.0231e-01,
         3.5988e-01,  6.4603e-01, -2.4307e-01, -1.7524e+00,  6.0048e-01,
         1.5870e+00,  1.1522e+00, -2.9785e-01, -6.7968e-01, -9.3511e-01,
        -2.0805e+00, -6.5376e-01,  8.5285e-01,  1.4435e+00, -1.3131e+00,
         9.9067e-01,  1.1094e+00,  3.1653e-01, -1.8434e-01,  2.5890e-01,
         1.5518e-01,  1.9225e+00, -8.0886e-01,  5.5146e-01,  1.2097e+00,
         6.6111e-01, -7.7961e-01,  9.6408e-01, -1.2769e+00,  1.7773e+00,
        -7.8222e-01, -6.6785e-01,  2.0683e-01, -3.9245e-01, -2.1446e+00,
         4.2825e-01, -5.4143e-01,  5.4102e-01, -1.3164e-01],
       grad_fn=<SelectBackward0>)

In [13]:
mask = torch.triu(torch.ones(x_emb.shape[1],x_emb.shape[1]) * float('-inf'), diagonal=1).to(x.device)
mask.shape

torch.Size([6, 6])

In [16]:
x_contextual_emb = transformer_layer(x_emb_pos_encoding,mask)
print(x_contextual_emb.shape)

torch.Size([1, 6, 64])


In [17]:
fc = nn.Linear(embed_size, vocab_size)
x_logits = fc(x_contextual_emb)
print(x_logits.shape)

torch.Size([1, 6, 50257])


# Loss calculation

In [18]:
text = "This thesis addresses the critical need for effective Fault Detection and Isolation (FDI) in green hydrogen (GH2) production, a key player in mitigating the greenhouse effect. To tackle this challenge, this thesis introduces a hybrid strategy for FDI. Extensive reviews of FDI algorithms reveal a gap in existing literature, emphasizing accuracy but neglecting the need for labeled data. Additionally, explainability in Hybrid-FDI is often overlooked. The proposed hybrid approach aims to be efficient in data usage and explainable, leveraging physics-based models and Artificial Intelligence (AI). This study introduces Bond Graph-Convolutional Neural Net (BG-CNN), a novel hybrid FDI method addressing AI model training challenges for fault diagnosis. BG-CNN combines BG residual generation and CNN-based fault classification, particularly in scenarios with limited labeled data. Additionally, a Self-Supervised Learning (SSL) method enhances FDI in such situations. The study also discusses Bond Graph-eXplainable AI (BG-XAI), an occlusion-based method, emphasizing the importance of meaningful explanations for fault predictions, showcasing its effectiveness through visualizations. The BG-CNN method with SSL was employed for the FDI of the Proton Exchange Membrane (PEM) electrolyzer and railway tracks, surpassing the performance of traditional methods. Comparative analysis demonstrated the superior performance of the proposed method, particularly in scenarios with limited labeled data, outperforming state-of-the-art SSL methods. The BG-XAI method was used to provide explanations for predictions in accordance with structural analysis."

tokenized_text = tokenizer(text, return_tensors='pt').input_ids

seq_length = 10
x = tokenized_text[:,:seq_length]
y = tokenized_text[:,1:seq_length+1]

print(x, x.shape)
print(y, y.shape)

print(tokenizer.batch_decode(x))
print(tokenizer.batch_decode(y))

tensor([[ 1212, 21554,  9405,   262,  4688,   761,   329,  4050, 40050, 46254]]) torch.Size([1, 10])
tensor([[21554,  9405,   262,  4688,   761,   329,  4050, 40050, 46254,   290]]) torch.Size([1, 10])
['This thesis addresses the critical need for effective Fault Detection']
[' thesis addresses the critical need for effective Fault Detection and']


In [20]:
x_emb = embedding(x)
x_emb_pos_encoding = x_emb + positional_encoding[:, :x_emb.shape[1], :]
mask = torch.triu(torch.ones(x_emb.shape[1],x_emb.shape[1]) * float('-inf'), diagonal=1).to(x.device)
x_contextual_emb = transformer_layer(x_emb_pos_encoding,mask)
x_logits = fc(x_contextual_emb)

print(x_logits.shape)

loss_fn = nn.CrossEntropyLoss()

print(x_logits.argmax(dim=-1))

loss = loss_fn(x_logits.view(-1, vocab_size), y.view(-1))

print(loss)


torch.Size([1, 10, 50257])
tensor([[16884, 48322, 13148, 13828,  9192, 22156, 48815, 31396, 24469, 46729]])
tensor(10.9978, grad_fn=<NllLossBackward0>)


In [26]:
# Step 2: Define the Model
class TransformerLanguageModel(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, hidden_dim, seq_length):
        super(TransformerLanguageModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.positional_encoding = nn.Parameter(torch.zeros(1, seq_length, embed_size))

        self.encoder_layer = nn.TransformerEncoderLayer(
                d_model=embed_size,
                nhead=num_heads,
                dim_feedforward=hidden_dim,
                batch_first=True) 
        
        self.fc = nn.Linear(embed_size, vocab_size)

    def forward(self, x, mask=None):
        seq_length = x.size(1)
        x = self.embedding(x) + self.positional_encoding[:, :seq_length, :]
 
        x = self.encoder_layer(x, src_mask=mask)

        x = self.fc(x)
        return x

In [33]:
num_epochs = 20
learning_rate = 0.001

# Model, loss, optimizer
model = TransformerLanguageModel(
    vocab_size=dataset.vocab_size,
    embed_size=embed_size,
    num_heads=num_heads,
    hidden_dim=hidden_dim,
    seq_length=seq_length
)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [34]:
for epoch in tqdm(range(num_epochs)):
        model.train()
        total_loss = 0
        for x, y in dataloader:
            optimizer.zero_grad()
            mask = torch.triu(torch.ones(seq_length, seq_length) * float('-inf'), diagonal=1).to(x.device)
            output = model(x, mask)
            loss = criterion(output.view(-1, dataset.vocab_size), y.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch 1/20, Loss: 10.4195
Epoch 2/20, Loss: 8.6267
Epoch 3/20, Loss: 6.2607
Epoch 4/20, Loss: 4.5613
Epoch 5/20, Loss: 3.6864
Epoch 6/20, Loss: 3.0292
Epoch 7/20, Loss: 2.4021
Epoch 8/20, Loss: 1.8674
Epoch 9/20, Loss: 1.4499
Epoch 10/20, Loss: 1.1197
Epoch 11/20, Loss: 0.9039
Epoch 12/20, Loss: 0.7394
Epoch 13/20, Loss: 0.6221
Epoch 14/20, Loss: 0.5425
Epoch 15/20, Loss: 0.4686
Epoch 16/20, Loss: 0.4154
Epoch 17/20, Loss: 0.3781
Epoch 18/20, Loss: 0.3438
Epoch 19/20, Loss: 0.3223
Epoch 20/20, Loss: 0.2969


In [37]:
import time
# Generate text
model.eval()
input_text = "a key player in mitigating the greenhouse effect"
input_seq = torch.tensor(tokenizer.encode(input_text), dtype=torch.long).unsqueeze(0)
generated = input_text

for _ in range(50):
    with torch.no_grad():
        output = model(input_seq)
        next_token = output.argmax(dim=-1)[0,-1].item()
        generated += tokenizer.decode([next_token])
        input_seq = torch.cat([input_seq, torch.tensor([[next_token]])], dim=1)
        input_seq = input_seq[:, -seq_length:]
        # break
        time.sleep(0.1)
        print("Generated Text:", generated)

Generated Text: a key player in mitigating the greenhouse effect.
Generated Text: a key player in mitigating the greenhouse effect. To
Generated Text: a key player in mitigating the greenhouse effect. To tackle
Generated Text: a key player in mitigating the greenhouse effect. To tackle this
Generated Text: a key player in mitigating the greenhouse effect. To tackle this challenge
Generated Text: a key player in mitigating the greenhouse effect. To tackle this challenge,
Generated Text: a key player in mitigating the greenhouse effect. To tackle this challenge, this
Generated Text: a key player in mitigating the greenhouse effect. To tackle this challenge, this thesis
Generated Text: a key player in mitigating the greenhouse effect. To tackle this challenge, this thesis introduces
Generated Text: a key player in mitigating the greenhouse effect. To tackle this challenge, this thesis introduces a
Generated Text: a key player in mitigating the greenhouse effect. To tackle this challenge, 

In [41]:
model.eval()

input_text='This thesis addresses the critical need for effective Fault Detection'
input_seq = torch.tensor(tokenizer.encode(input_text), dtype=torch.long).unsqueeze(0)
print(input_seq,input_seq.shape)
with torch.no_grad():
    output = model(input_seq)
    next_token = output.argmax(dim=-1)[0,-1].item()

print(output.argmax(dim=-1),output.argmax(dim=-1).shape)
print(tokenizer.batch_decode(output.argmax(dim=-1)))

tensor([[ 1212, 21554,  9405,   262,  4688,   761,   329,  4050, 40050, 46254]]) torch.Size([1, 10])
tensor([[21554, 20718,   262,   376,   761,   329,  4050, 40050, 46254,   290]]) torch.Size([1, 10])
[' thesis introduces the F need for effective Fault Detection and']
