In [1]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")


True
Tesla T4


In [2]:
!pip install -qU \
  transformers \
  accelerate \
  peft \
  datasets \
  sentencepiece \
  langchain-community \
  pypdf


In [3]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
from langchain_community.document_loaders import PyPDFLoader

pdf_path = "/content/drive/MyDrive/Elements of Electromagnetics.pdf"

loader = PyPDFLoader(pdf_path)
docs = loader.load()

len(docs), docs[0].page_content[:500]


(926, '')

In [5]:
for i in range(5):
    print("PAGE", i, "preview:")
    print(docs[i].page_content[:500])
    print("-" * 80)


PAGE 0 preview:

--------------------------------------------------------------------------------
PAGE 1 preview:
PRACTICAL APPLICATIONS
Some of the real-life applications covered in this book are listed in order of appearance.
	 • Applications of electrostatics (Section 4.1)
 • Electrostatic separation of solids (Example 4.3)
 • Electrostatic discharge (ESD) (Section 4.11)
 • Electrostatic shielding (Section 5.9B)
 • High dielectric constant materials (Section 5.10)
 • Graphene (Section 5.11) NEW
 • Electrohydrodynamic pump (Example 6.1)
 • Xerographic copying machine (Example 6.2)
 • Parallel-plate capaci
--------------------------------------------------------------------------------
PAGE 2 preview:
• Textile antennas and sensors (Section 13.11) NEW
 • RFID (Section 13.12) NEW
 • Commercial EM software—FEKO (Section 14.7) NEW
 • COMSOL Multiphysics (Section 14.8) NEW
 • CST Microwave Studio (Section 14.9) NEW
   Approximate 
  Best Experimental Value for Problem 
Quantity (Units) Sy

In [6]:
for i in [10, 20, 50, 100, 200, 400, 800]:
    if i < len(docs):
        print("PAGE", i, "preview:")
        print(repr(docs[i].page_content[:500]))
        print("-" * 80)


PAGE 10 preview:
'3.4 Del Operator  69\n3.5 Gradient of a Scalar  71\n3.6 Divergence of a Vector and Divergence Theorem  75\n3.7 Curl of a Vector and Stokes’s Theorem  82\n3.8 Laplacian of a Scalar  90\n †3.9 Classification of Vector Fields  92\nSummary  97\nReview Questions  98\nProblems  100\nPART 2:  ELECTROSTATICS\n4 ELECTROSTATIC FIELDS   111\n4.1 Introduction  111\n4.2 Coulomb’s Law and Field Intensity  112\n4.3 Electric Fields due to Continuous Charge Distributions  119\n4.4  Electric Flux Density  130\n4.5  Gauss’s Law—'
--------------------------------------------------------------------------------
PAGE 20 preview:
'ABOUT THE AUTHOR\nMatthew N. O. Sadiku received his BSc degree in 1978 from Ahmadu Bello University, \nZaria, Nigeria, and his MSc and PhD degrees from Tennessee Technological University, \nCookeville, Tennessee, in 1982 and 1984, respectively. From 1984 to 1988, he was an assis-\ntant professor at Florida Atlantic University, Boca Raton, Florida, where he did gra

In [7]:
raw_texts = [d.page_content for d in docs if d.page_content and d.page_content.strip()]

len(raw_texts), raw_texts[0][:500]


(925,
 'PRACTICAL APPLICATIONS\nSome of the real-life applications covered in this book are listed in order of appearance.\n\t • Applications of electrostatics (Section 4.1)\n • Electrostatic separation of solids (Example 4.3)\n • Electrostatic discharge (ESD) (Section 4.11)\n • Electrostatic shielding (Section 5.9B)\n • High dielectric constant materials (Section 5.10)\n • Graphene (Section 5.11) NEW\n • Electrohydrodynamic pump (Example 6.1)\n • Xerographic copying machine (Example 6.2)\n • Parallel-plate capaci')

In [8]:
import re

full_text = "\n\n".join(raw_texts)

full_text = re.sub(r"\s+", " ", full_text)

len(full_text)


1282744

In [9]:
chunk_size = 800
chunk_overlap = 200

chunks = []
start = 0
while start < len(full_text):
    end = start + chunk_size
    chunk = full_text[start:end]
    chunks.append(chunk)
    start += (chunk_size - chunk_overlap)

len(chunks), chunks[0][:400]


(2138,
 'PRACTICAL APPLICATIONS Some of the real-life applications covered in this book are listed in order of appearance. • Applications of electrostatics (Section 4.1) • Electrostatic separation of solids (Example 4.3) • Electrostatic discharge (ESD) (Section 4.11) • Electrostatic shielding (Section 5.9B) • High dielectric constant materials (Section 5.10) • Graphene (Section 5.11) NEW • Electrohydrodyna')

In [10]:
from datasets import Dataset

def make_example(chunk):
    return {
        "instruction": "Explain the following electromagnetics content clearly and in detail.",
        "input": chunk,
        "output": chunk,
    }

examples = [make_example(c) for c in chunks]

dataset = Dataset.from_list(examples)
dataset


Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 2138
})

In [11]:
dataset = dataset.train_test_split(test_size=0.05, seed=42)
dataset


DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 2031
    })
    test: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 107
    })
})

In [12]:
dataset["train"][0]


{'instruction': 'Explain the following electromagnetics content clearly and in detail.',
 'input': ', G, and C. Answer: 3.2 V/m, 38.2 nH/m, 5 3 1024 S/m, 5.97 pF/m. EX AMPLE 11.2 11_Sadiku_Ch11.indd 563 25/09/17 5:24 PM 564 CHAPTER 11 TRANSMISSION LINES Multiplying eqs. (11.2.1) and (11.2.3) together gives uZo 5 1 C or C 5 1 uZo 5 1 0.6 13 3 108 2 60 5 92.59 pF/m l 5 u f 5 0.6 13 3 108 2 108 5 1.8 m Consider a transmission line of length \ue02c, characterized by g and Zo, connected to a load ZL as shown in Figure 11.6(a). Looking into the line, the generator sees the line with the load as an input impedance Zin. It is our intention in this section to determine the input imped- ance, the standing wave ratio (SWR), and the power flow on the line. Let the transmission line extend from z 5 0 at the generator to z 5 , at the load. First of all, we need the voltage and current waves in eqs. (11.15)',
 'output': ', G, and C. Answer: 3.2 V/m, 38.2 nH/m, 5 3 1024 S/m, 5.97 pF/m. EX AMPLE 11.2 1

In [13]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
)
model.config.use_cache = False


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [14]:
def format_example(ex):
    prompt = f"""[INSTRUCTION]
{ex['instruction']}

[INPUT]
{ex['input']}

[RESPONSE]
"""
    ex["text"] = prompt + ex["output"]
    return ex

train_dataset = dataset["train"].map(format_example)
eval_dataset = dataset["test"].map(format_example)

train_dataset[0]["text"][:500]


Map:   0%|          | 0/2031 [00:00<?, ? examples/s]

Map:   0%|          | 0/107 [00:00<?, ? examples/s]

'[INSTRUCTION]\nExplain the following electromagnetics content clearly and in detail.\n\n[INPUT]\n, G, and C. Answer: 3.2 V/m, 38.2 nH/m, 5 3 1024 S/m, 5.97 pF/m. EX AMPLE 11.2 11_Sadiku_Ch11.indd 563 25/09/17 5:24 PM 564 CHAPTER 11 TRANSMISSION LINES Multiplying eqs. (11.2.1) and (11.2.3) together gives uZo 5 1 C or C 5 1 uZo 5 1 0.6 13 3 108 2 60 5 92.59 pF/m l 5 u f 5 0.6 13 3 108 2 108 5 1.8 m Consider a transmission line of length \ue02c, characterized by g and Zo, connected to a load ZL as shown in '

In [15]:
MAX_LEN = 512

In [16]:
def tokenize_fn(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=MAX_LEN,
        padding="max_length",
    )

train_tokenized = train_dataset.map(tokenize_fn, batched=True, remove_columns=train_dataset.column_names)
eval_tokenized = eval_dataset.map(tokenize_fn, batched=True, remove_columns=eval_dataset.column_names)

train_tokenized


Map:   0%|          | 0/2031 [00:00<?, ? examples/s]

Map:   0%|          | 0/107 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 2031
})

In [17]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
model.gradient_checkpointing_enable()
model.enable_input_require_grads()
model.print_trainable_parameters()

trainable params: 6,815,744 || all params: 7,248,547,840 || trainable%: 0.0940


In [18]:
from transformers import TrainingArguments, Trainer

output_dir = "mistral-em-sadiku-lora"

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=1,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=20,
    eval_strategy="steps",
    eval_steps=100,
    save_steps=500,
    save_total_limit=1,
    report_to="none",
)


In [19]:
def data_collator(features):
    batch = {k: [f[k] for f in features] for k in features[0].keys()}
    batch = {k: torch.tensor(v) for k, v in batch.items()}
    batch["labels"] = batch["input_ids"].clone()
    return batch

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=eval_tokenized,
    data_collator=data_collator,
)

trainer.train()


Step,Training Loss,Validation Loss
100,1.2586,1.293799
200,1.4026,1.253377
300,1.2002,1.213043
400,1.112,1.196954
500,1.2884,1.175268
600,1.3113,1.170304
700,1.248,1.157078
800,1.2712,1.142546
900,1.0848,1.130671


Step,Training Loss,Validation Loss
100,1.2586,1.293799
200,1.4026,1.253377
300,1.2002,1.213043
400,1.112,1.196954
500,1.2884,1.175268
600,1.3113,1.170304
700,1.248,1.157078
800,1.2712,1.142546
900,1.0848,1.130671
1000,1.142,1.124594


KeyboardInterrupt: 

In [20]:
output_dir = "mistral-em-sadiku-lora"

trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)


('mistral-em-sadiku-lora/tokenizer_config.json',
 'mistral-em-sadiku-lora/special_tokens_map.json',
 'mistral-em-sadiku-lora/chat_template.jinja',
 'mistral-em-sadiku-lora/tokenizer.model',
 'mistral-em-sadiku-lora/added_tokens.json',
 'mistral-em-sadiku-lora/tokenizer.json')

In [21]:
import os
print(os.listdir(output_dir))


['adapter_model.safetensors', 'special_tokens_map.json', 'adapter_config.json', 'tokenizer_config.json', 'training_args.bin', 'checkpoint-1000', 'chat_template.jinja', 'README.md', 'tokenizer.model', 'tokenizer.json']


In [32]:
from huggingface_hub import login

login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [33]:
hf_repo_id = "snithshibu/mistral-em-sadiku-lora"


In [35]:
from huggingface_hub import upload_folder

hf_token = "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"
hf_repo_id = "snithshibu/mistral-em-sadiku-lora"

upload_folder(
    repo_id=hf_repo_id,
    folder_path="mistral-em-sadiku-lora",
    repo_type="model",
    token=hf_token
)


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...diku-lora/tokenizer.model: 100%|##########|  493kB /  493kB            

  ...kpoint-1000/rng_state.pth:  77%|#######7  | 11.3kB / 14.6kB            

  ...ckpoint-1000/optimizer.pt:   1%|1         |  566kB / 54.7MB            

  ...adapter_model.safetensors:   1%|1         |  278kB / 27.3MB            

  ...adapter_model.safetensors:   1%|1         |  278kB / 27.3MB            

  ...checkpoint-1000/scaler.pt:   1%|1         |  14.0B / 1.38kB            

  ...ckpoint-1000/scheduler.pt:   1%|          |  14.0B / 1.47kB            

  ...nt-1000/training_args.bin:   1%|1         |  58.0B / 5.78kB            

  ...ku-lora/training_args.bin:   1%|1         |  58.0B / 5.78kB            

CommitInfo(commit_url='https://huggingface.co/snithshibu/mistral-em-sadiku-lora/commit/63340bf8fded8404a44f7c934418135216371bb0', commit_message='Upload folder using huggingface_hub', commit_description='', oid='63340bf8fded8404a44f7c934418135216371bb0', pr_url=None, repo_url=RepoUrl('https://huggingface.co/snithshibu/mistral-em-sadiku-lora', endpoint='https://huggingface.co', repo_type='model', repo_id='snithshibu/mistral-em-sadiku-lora'), pr_revision=None, pr_num=None)