<a href="https://colab.research.google.com/github/pokemonmaster67/Magical-LLM/blob/main/Combine_LLM_maker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
import os
import matplotlib.pyplot as plt
import seaborn as sns

class LLMMaker:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.config = None
        self.model_name = None

    def create_model(self, model_name="gpt2", custom_name="MyModel"):
        self.model_name = custom_name
        try:
            self.config = AutoConfig.from_pretrained(model_name)
            self.model = AutoModelForCausalLM.from_pretrained(model_name)
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            print(f"Model '{self.model_name}' loaded from {model_name} successfully!")
        except Exception as e:
            print(f"Error loading model: {e}")
            print("Please check the model name and try again.")
            return

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.model.config.pad_token_id = self.model.config.eos_token_id
            print("Added padding token to the tokenizer.")

    def combine_models(self, model_names, combination_method='average'):
        if not model_names:
            print("Please provide at least one model name to combine.")
            return

        combined_model = None
        combined_config = None

        for name in model_names:
            try:
                config = AutoConfig.from_pretrained(name)
                model = AutoModelForCausalLM.from_pretrained(name)

                if combined_model is None:
                    combined_model = model
                    combined_config = config
                else:
                    if combination_method == 'average':
                        # Average the weights of the models
                        for (combined_name, combined_param), (_, param) in zip(combined_model.named_parameters(), model.named_parameters()):
                            combined_param.data = (combined_param.data + param.data) / 2
                    elif combination_method == 'concatenate':
                        # Concatenate the hidden states (only works for models with the same architecture)
                        if combined_config.hidden_size != config.hidden_size:
                            print(f"Cannot concatenate {name} due to different hidden sizes.")
                            continue
                        combined_config.num_hidden_layers += config.num_hidden_layers
                        new_layers = list(combined_model.transformer.h) + list(model.transformer.h)
                        combined_model.transformer.h = torch.nn.ModuleList(new_layers)
                    else:
                        print(f"Unknown combination method: {combination_method}")
                        return

                print(f"Combined model {name}")
            except Exception as e:
                print(f"Error combining model {name}: {e}")

        self.model = combined_model
        self.config = combined_config
        self.tokenizer = AutoTokenizer.from_pretrained(model_names[0])
        print("Models combined successfully!")

    def train_model(self, train_data, epochs=1, learning_rate=5e-5):
        if self.model is None:
            print("Please create a model first!")
            return

        optimizer = torch.optim.AdamW(self.model.parameters(), lr=learning_rate)
        self.model.train()

        for epoch in range(epochs):
            total_loss = 0
            for input_text, target_text in train_data:
                inputs = self.tokenizer(input_text, return_tensors='pt', truncation=True, padding=True)
                targets = self.tokenizer(target_text, return_tensors='pt', truncation=True, padding=True)

                max_length = max(inputs['input_ids'].size(1), targets['input_ids'].size(1))
                inputs = self.tokenizer(input_text, return_tensors='pt', truncation=True, padding='max_length', max_length=max_length)
                targets = self.tokenizer(target_text, return_tensors='pt', truncation=True, padding='max_length', max_length=max_length)

                input_ids = inputs['input_ids']
                target_ids = targets['input_ids']

                outputs = self.model(input_ids, labels=target_ids)
                loss = outputs.loss
                total_loss += loss.item()

                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

            print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_data)}")

    def generate_text(self, prompt, max_length=100):
        if self.model is None:
            print("Please create a model first!")
            return

        input_ids = self.tokenizer.encode(prompt, return_tensors='pt')
        output = self.model.generate(input_ids, max_length=max_length, num_return_sequences=1)
        generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
        print(f"Generated text: {generated_text}")

    def save_model(self, path):
        if self.model is None:
            print("Please create a model first!")
            return

        try:
            os.makedirs(path, exist_ok=True)
            self.model.save_pretrained(path)
            self.tokenizer.save_pretrained(path)
            print(f"Model '{self.model_name}' saved successfully!")
        except Exception as e:
            print(f"An error occurred while saving the model: {str(e)}")

    def load_model(self, path):
        try:
            self.model = AutoModelForCausalLM.from_pretrained(path)
            self.tokenizer = AutoTokenizer.from_pretrained(path)
            self.config = self.model.config
            print(f"Model '{self.model_name}' loaded successfully!")
        except Exception as e:
            print(f"An error occurred while loading the model: {str(e)}")

def get_training_data_from_terminal():
    training_data = []
    print("Enter input-output pairs for training. Type 'done' when finished.")
    while True:
        input_text = input("Input: ")
        if input_text.lower() == 'done':
            break
        output_text = input("Output: ")
        training_data.append((input_text, output_text))
    return training_data

def main():
    llm_maker = LLMMaker()

    while True:
        print("\nLLM Maker 3.0 Menu:")
        print("1. Create Model")
        print("2. Combine Models")
        print("3. Train Model")
        print("4. Generate Text")
        print("5. Save Model")
        print("6. Load Model")
        print("7. Exit")

        choice = input("Enter your choice (1-7): ")

        if choice == '1':
            model_name = input("Enter the name of the model from Hugging Face (e.g., 'gpt2', 'EleutherAI/gpt-neo-1.3B'): ")
            custom_name = input("Enter a custom name for your model: ")
            llm_maker.create_model(model_name, custom_name)

        elif choice == '2':
            num_models = int(input("How many models do you want to combine? "))
            model_names = []
            for i in range(num_models):
                model_name = input(f"Enter the name of model {i+1} from Hugging Face: ")
                model_names.append(model_name)
            combination_method = input("Enter combination method (average/concatenate): ")
            llm_maker.combine_models(model_names, combination_method)

        elif choice == '3':
            train_data = get_training_data_from_terminal()
            epochs = int(input("Enter number of epochs: "))
            learning_rate = float(input("Enter learning rate: "))
            llm_maker.train_model(train_data, epochs, learning_rate)

        elif choice == '4':
            prompt = input("Enter a prompt for text generation: ")
            max_length = int(input("Enter maximum length for generated text: "))
            llm_maker.generate_text(prompt, max_length)

        elif choice == '5':
            path = input("Enter path to save the model: ")
            llm_maker.save_model(path)

        elif choice == '6':
            path = input("Enter path to load the model: ")
            model_name = input("Enter the name of the model to load: ")
            llm_maker.model_name = model_name
            llm_maker.load_model(path)

        elif choice == '7':
            print("Thank you for using LLM Maker 3.0!")
            break

        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()


LLM Maker 3.0 Menu:
1. Create Model
2. Combine Models
3. Train Model
4. Generate Text
5. Save Model
6. Load Model
7. Exit
Enter your choice (1-7): 2
How many models do you want to combine? 3
Enter the name of model 1 from Hugging Face: openai-community/gpt2
Enter the name of model 2 from Hugging Face: google-bert/bert-base-uncased
Enter the name of model 3 from Hugging Face: distilbert/distilbert-base-uncased
Enter combination method (average/concatenate): average


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Combined model openai-community/gpt2


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`


Error combining model google-bert/bert-base-uncased: The size of tensor a (50257) must match the size of tensor b (30522) at non-singleton dimension 0


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Error combining model distilbert/distilbert-base-uncased: Unrecognized configuration class <class 'transformers.models.distilbert.configuration_distilbert.DistilBertConfig'> for this kind of AutoModel: AutoModelForCausalLM.
Model type should be one of BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, CpmAntConfig, CTRLConfig, Data2VecTextConfig, DbrxConfig, ElectraConfig, ErnieConfig, FalconConfig, FuyuConfig, GemmaConfig, Gemma2Config, GitConfig, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GPTJConfig, JambaConfig, JetMoeConfig, LlamaConfig, MambaConfig, MarianConfig, MBartConfig, MegaConfig, MegatronBertConfig, MistralConfig, MixtralConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, OlmoConfig, OpenLlamaConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, Persimmo

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Models combined successfully!

LLM Maker 3.0 Menu:
1. Create Model
2. Combine Models
3. Train Model
4. Generate Text
5. Save Model
6. Load Model
7. Exit
Enter your choice (1-7): 5
Enter path to save the model: /content/LLM
Model 'None' saved successfully!

LLM Maker 3.0 Menu:
1. Create Model
2. Combine Models
3. Train Model
4. Generate Text
5. Save Model
6. Load Model
7. Exit


KeyboardInterrupt: Interrupted by user