In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from torch import device
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define a simple dataset class
class GearDataset(Dataset):
    def __init__(self, data):
        self.data = data.readlines()  # Read lines from the file

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx].strip()


In [3]:
#hyperparameters
epochs = 10
batch_size=1
lr = 5e-5

In [4]:
# Fine-tuning function
def fine_tune_gpt2(dataset, tokenizer, model, epochs, batch_size):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.train()

    # Create DataLoader
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

    # Use tqdm for progress bar
    total_steps = len(dataloader) * epochs
    progress_bar = tqdm(total=total_steps, desc="Training", unit="batch")

    for epoch in range(epochs):
        total_loss = 0
        for batch in dataloader:
            inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True).to(device)
            labels = inputs["input_ids"].clone()
            labels[labels == tokenizer.pad_token_id] = -100  # Mask padding tokens
            optimizer.zero_grad()
            outputs = model(**inputs, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            progress_bar.update(1)  # Update progress bar

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss}")
    progress_bar.close()

    model.save_pretrained('/home/rithwik/paper/saved_models')
    print(f"Fine-tuned model saved at: {'/home/rithwik/paper/saved_models'}")

In [5]:
#gear data
prompts = [
    "Design a gear with 20 teeth for a load of 1000N.",
    "Design a gear for a speed of 500rpm and torque of 200Nm.",
    "Design a gear to transmit power of 5kW at an efficiency of 90%.",

    # **Type and Material:**
    "Design a bevel gear made of stainless steel with a gear ratio of 2:1.",
    "Design a helical gear made of nylon with a pressure angle of 20 degrees.",
    "Design a worm gear made of bronze with a worm gear ratio of 10:1.",
    "Design a spur gear made of cast iron with a module of 2.5 mm.",

    # **Dimensions:**
    "Design a gear with a diameter of 50 mm and a face width of 10 mm.",
    "Design a gear with a pitch diameter of 30 mm and a circular pitch of 6 mm.",
    "Design a gear with a center distance of 200 mm and a gear ratio of 3:1.",

    # **Constraints and Applications:**
    "Design a gear for a high-speed application with minimal backlash requirement.",
    "Design a gear for an application with limited space that needs to be lightweight.",
    "Design a gear for a noisy environment that needs to operate quietly.",
    "Design a gear for a low-speed application with a high torque requirement.",
    "Design a gear for an application subject to shock loads that needs to be durable.",
    "Design a gear for a food processing application that requires corrosion resistance.",
    "Design a gear for a medical device that needs to be sterilizable.",
    "Design a gear for a robotic arm with a high degree of accuracy needed.",

    # **Combinations:**
    "Design a spur gear for a load of 500N and a speed of 1000rpm.",
    "Design a bevel gear for a torque of 100Nm and a gear ratio of 3:2.",
    "Design a worm gear to transmit power of 2kW with an efficiency of 85%.",
    "Design a helical gear with a pressure angle of 30 degrees and a module of 3 mm.",
    "Design a gear with a diameter of 75 mm and a face width of 15 mm for a high-torque application.",

    # **Variations:**
    "Design a gear with a different number of teeth (beyond 20) based on specific constraints.",
    "Design a gear with a different gear ratio based on desired speed and torque requirements.",
    "Design a gear with a different material based on application-specific needs (e.g., aluminum, plastic).",
    "Design a gear with a different pressure angle for optimized performance.",
    "Design a gear with a different module based on desired size and strength.",

    # **Additional Considerations:**
    "Design a gear that is self-lubricating.",
    "Design a gear that is back-drivable.",
    "Design a gear with a clutch mechanism.",
    "Design a gear with a variable gear ratio.",
    "Design a gear with multiple teeth per pitch (e.g., internal gear).",

    # **Applications:**
    "Design a gear for a conveyor belt.",
    "Design a gear for a bicycle.",
    "Design a gear for a wind turbine.",
    "Design a gear for a gear pump.",
    "Design a gear for a 3D printer.",
    "Design a gear for a clock.",
    "Design a gear for a washing machine.",
    "Design a gear for a blender.",
    "Design a gear for a drill.",
    "Design a gear for a power saw.",

    # **Advanced:**
    "Design a gear set for a multi-speed transmission.",
    "Design a gear train for a specific power transmission ratio.",
    "Design a gear system with an idler gear.",
    "Design a gear system with a compound gear train.",
    "Design a gear system for a planetary gear set.",

    # **Real-World Examples:**
    "Design a gear similar to the one found in a specific car model's transmission.",
    "Design a gear similar to the one used in a specific lawnmower engine.",
]


In [None]:
# Load the PDF and preprocess the text
pdf_path = '/home/rithwik/paper/dataset/shigley_cleaned_text.txt'  # Replace with your PDF path
text = open(pdf_path, 'r')

# Initialize tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.add_special_tokens({'pad_token': '<pad>'})  # Set padding token
config = GPT2Config.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2", config=config)

# Prepare dataset and fine-tune GPT-2
gear_dataset = GearDataset(text)
fine_tune_gpt2(gear_dataset, tokenizer, model, epochs, batch_size)

In [None]:
for prompt in prompts:
    input_text = "<BOS> " + prompt  # Beginning of sequence token
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    input_ids = input_ids.to(device)

    # Generate output
    output = model.generate(input_ids=input_ids, max_length=100, temperature=0.7, num_return_sequences=1)
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    print("Generated Gear Design:", output_text)