<a href="https://colab.research.google.com/github/pokemonmaster67/Magical-LLM/blob/main/LLMMAKER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from transformers import GPT2Config, GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM, AutoTokenizer
import argparse
import os
import matplotlib.pyplot as plt
import seaborn as sns
import json

class LLMMaker:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.config = None
        self.model_name = None

    def create_model(self, from_scratch=True, base_model='gpt2', vocab_size=50257, n_layer=12, n_head=12, n_embd=768, model_name="MyModel"):
        self.model_name = model_name
        if from_scratch:
            if n_embd % n_head != 0:
                n_embd = (n_embd // n_head) * n_head
                print(f"Adjusted n_embd to {n_embd} to ensure divisibility by n_head")

            self.config = GPT2Config(
                vocab_size=vocab_size,
                n_layer=n_layer,
                n_head=n_head,
                n_embd=n_embd
            )
            try:
                self.model = GPT2LMHeadModel(self.config)
                self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
                print(f"Model '{self.model_name}' created from scratch successfully!")
            except ValueError as e:
                print(f"Error creating model: {e}")
                print("Please adjust the parameters and try again.")
                return
        else:
            try:
                self.model = AutoModelForCausalLM.from_pretrained(base_model)
                self.tokenizer = AutoTokenizer.from_pretrained(base_model)
                self.config = self.model.config
                print(f"Model '{self.model_name}' loaded from {base_model} for fine-tuning!")
            except Exception as e:
                print(f"Error loading model: {e}")
                print("Please check the base model name and try again.")
                return

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.model.config.pad_token_id = self.model.config.eos_token_id
            print("Added padding token to the tokenizer.")

    def train_model(self, train_data, epochs=1, learning_rate=5e-5):
        if self.model is None:
            print("Please create a model first!")
            return

        optimizer = torch.optim.AdamW(self.model.parameters(), lr=learning_rate)
        self.model.train()

        for epoch in range(epochs):
            total_loss = 0
            for input_text, target_text in train_data:
                # Encode input and target
                inputs = self.tokenizer(input_text, return_tensors='pt', truncation=True, padding=True)
                targets = self.tokenizer(target_text, return_tensors='pt', truncation=True, padding=True)

                # Ensure input and target have the same sequence length
                max_length = max(inputs['input_ids'].size(1), targets['input_ids'].size(1))
                inputs = self.tokenizer(input_text, return_tensors='pt', truncation=True, padding='max_length', max_length=max_length)
                targets = self.tokenizer(target_text, return_tensors='pt', truncation=True, padding='max_length', max_length=max_length)

                input_ids = inputs['input_ids']
                target_ids = targets['input_ids']

                # Forward pass
                outputs = self.model(input_ids, labels=target_ids)
                loss = outputs.loss
                total_loss += loss.item()

                # Backward pass and optimization
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

            print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_data)}")

    def fine_tune(self, fine_tune_data, epochs=1, learning_rate=1e-5):
        self.train_model(fine_tune_data, epochs, learning_rate)

    def evaluate_model(self, eval_data):
        if self.model is None:
            print("Please create a model first!")
            return

        self.model.eval()
        total_loss = 0

        with torch.no_grad():
            for input_text, target_text in eval_data:
                # Encode input and target
                inputs = self.tokenizer(input_text, return_tensors='pt', truncation=True, padding=True)
                targets = self.tokenizer(target_text, return_tensors='pt', truncation=True, padding=True)

                # Ensure input and target have the same sequence length
                max_length = max(inputs['input_ids'].size(1), targets['input_ids'].size(1))
                inputs = self.tokenizer(input_text, return_tensors='pt', truncation=True, padding='max_length', max_length=max_length)
                targets = self.tokenizer(target_text, return_tensors='pt', truncation=True, padding='max_length', max_length=max_length)

                input_ids = inputs['input_ids']
                target_ids = targets['input_ids']

                outputs = self.model(input_ids, labels=target_ids)
                total_loss += outputs.loss.item()

        avg_loss = total_loss / len(eval_data)
        perplexity = torch.exp(torch.tensor(avg_loss))
        print(f"Evaluation Loss: {avg_loss}")
        print(f"Perplexity: {perplexity.item()}")

    def generate_text(self, prompt, max_length=100):
        if self.model is None:
            print("Please create a model first!")
            return

        input_ids = self.tokenizer.encode(prompt, return_tensors='pt')
        output = self.model.generate(input_ids, max_length=max_length, num_return_sequences=1)
        generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
        print(f"Generated text: {generated_text}")

    def save_model(self, path):
        if self.model is None:
            print("Please create a model first!")
            return

        try:
            os.makedirs(path, exist_ok=True)
            torch.save(self.model.state_dict(), os.path.join(path, f"{self.model_name}.pth"))
            self.tokenizer.save_pretrained(path)
            self.config.to_json_file(os.path.join(path, "config.json"))
            print(f"Model '{self.model_name}' saved successfully!")
        except Exception as e:
            print(f"An error occurred while saving the model: {str(e)}")

    def load_model(self, path):
        try:
            config_path = os.path.join(path, "config.json")
            self.config = GPT2Config.from_json_file(config_path)
            self.model = GPT2LMHeadModel(self.config)
            self.model.load_state_dict(torch.load(os.path.join(path, f"{self.model_name}.pth")))
            self.tokenizer = GPT2Tokenizer.from_pretrained(path)
            print(f"Model '{self.model_name}' loaded successfully!")
        except Exception as e:
            print(f"An error occurred while loading the model: {str(e)}")

    def adjust_hyperparameters(self, learning_rate=None, batch_size=None, dropout=None):
        if self.model is None:
            print("Please create a model first!")
            return

        if learning_rate:
            for param_group in self.model.optimizer.param_groups:
                param_group['lr'] = learning_rate
            print(f"Learning rate adjusted to {learning_rate}")

        if batch_size:
            print(f"Batch size set to {batch_size}")

        if dropout is not None:
            for module in self.model.modules():
                if isinstance(module, torch.nn.Dropout):
                    module.p = dropout
            print(f"Dropout rate adjusted to {dropout}")

    def visualize_attention(self, text):
        if self.model is None:
            print("Please create a model first!")
            return

        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        attention = outputs.attentions[-1].mean(dim=1).mean(dim=1).detach().numpy()

        plt.figure(figsize=(10, 8))
        sns.heatmap(attention, annot=True, cmap='YlGnBu')
        plt.title('Attention Visualization')
        plt.xlabel('Token Position')
        plt.ylabel('Token Position')
        plt.show()

def get_training_data_from_terminal():
    training_data = []
    print("Enter input-output pairs for training. Type 'done' when finished.")
    while True:
        input_text = input("Input: ")
        if input_text.lower() == 'done':
            break
        output_text = input("Output: ")
        training_data.append((input_text, output_text))
    return training_data

def main():
    llm_maker = LLMMaker()

    while True:
        print("\nLLM Maker 2.0 Menu:")
        print("1. Create/Load Model")
        print("2. Train Model")
        print("3. Fine-tune Model")
        print("4. Evaluate Model")
        print("5. Generate Text")
        print("6. Save Model")
        print("7. Load Model")
        print("8. Adjust Hyperparameters")
        print("9. Visualize Attention")
        print("10. Exit")

        choice = input("Enter your choice (1-10): ")

        if choice == '1':
            create_choice = input("Do you want to create a model from scratch? (yes/no): ").lower()
            model_name = input("Enter a name for your model: ")
            if create_choice == 'yes':
                vocab_size = int(input("Enter vocabulary size (default 50257): ") or 50257)
                n_layer = int(input("Enter number of layers (default 12): ") or 12)
                n_head = int(input("Enter number of attention heads (default 12): ") or 12)
                n_embd = int(input("Enter embedding dimension (default 768): ") or 768)
                llm_maker.create_model(from_scratch=True, vocab_size=vocab_size, n_layer=n_layer, n_head=n_head, n_embd=n_embd, model_name=model_name)
            else:
                base_model = input("Enter the name of the pre-trained model to fine-tune (e.g., 'gpt2', 'gpt2-medium'): ")
                llm_maker.create_model(from_scratch=False, base_model=base_model, model_name=model_name)

        elif choice == '2':
            train_data = get_training_data_from_terminal()
            epochs = int(input("Enter number of epochs: "))
            learning_rate = float(input("Enter learning rate: "))
            llm_maker.train_model(train_data, epochs, learning_rate)

        elif choice == '3':
            fine_tune_data = get_training_data_from_terminal()
            epochs = int(input("Enter number of epochs: "))
            learning_rate = float(input("Enter learning rate: "))
            llm_maker.fine_tune(fine_tune_data, epochs, learning_rate)

        elif choice == '4':
            eval_data = get_training_data_from_terminal()
            llm_maker.evaluate_model(eval_data)

        elif choice == '5':
            prompt = input("Enter a prompt for text generation: ")
            max_length = int(input("Enter maximum length for generated text: "))
            llm_maker.generate_text(prompt, max_length)

        elif choice == '6':
            path = input("Enter path to save the model: ")
            llm_maker.save_model(path)

        elif choice == '7':
            path = input("Enter path to load the model: ")
            model_name = input("Enter the name of the model to load: ")
            llm_maker.model_name = model_name
            llm_maker.load_model(path)

        elif choice == '8':
            lr = float(input("Enter new learning rate (or press Enter to skip): ") or 0)
            bs = int(input("Enter new batch size (or press Enter to skip): ") or 0)
            dr = float(input("Enter new dropout rate (or press Enter to skip): ") or -1)
            llm_maker.adjust_hyperparameters(lr or None, bs or None, dr if dr >= 0 else None)

        elif choice == '9':
            text = input("Enter text for attention visualization: ")
            llm_maker.visualize_attention(text)

        elif choice == '10':
            print("Thank you for using LLM Maker 2.0!")
            break

        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()


LLM Maker 2.0 Menu:
1. Create/Load Model
2. Train Model
3. Fine-tune Model
4. Evaluate Model
5. Generate Text
6. Save Model
7. Load Model
8. Adjust Hyperparameters
9. Visualize Attention
10. Exit
