<a href="https://colab.research.google.com/github/pokemonmaster67/Magical-LLM/blob/main/Untitled33.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import argparse
import os
import matplotlib.pyplot as plt
import seaborn as sns
import json

class TextClassifierMaker:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.config = None
        self.model_name = None
        self.num_labels = None

    def create_model(self, base_model='bert-base-uncased', num_labels=2, model_name="MyClassifier"):
        self.model_name = model_name
        self.num_labels = num_labels
        try:
            self.model = AutoModelForSequenceClassification.from_pretrained(base_model, num_labels=num_labels)
            self.tokenizer = AutoTokenizer.from_pretrained(base_model)
            self.config = self.model.config
            print(f"Model '{self.model_name}' loaded from {base_model} for fine-tuning!")
        except Exception as e:
            print(f"Error loading model: {e}")
            print("Please check the base model name and try again.")
            return

    def train_model(self, train_data, epochs=3, learning_rate=2e-5, batch_size=16):
        if self.model is None:
            print("Please create a model first!")
            return

        optimizer = torch.optim.AdamW(self.model.parameters(), lr=learning_rate)
        self.model.train()

        for epoch in range(epochs):
            total_loss = 0
            for i in range(0, len(train_data), batch_size):
                batch_inputs = self.tokenizer(train_data['text'][i:i+batch_size], return_tensors='pt', truncation=True, padding=True)
                batch_labels = torch.tensor(train_data['label'][i:i+batch_size])

                outputs = self.model(**batch_inputs, labels=batch_labels)
                loss = outputs.loss
                total_loss += loss.item()

                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

            print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_data)}")

    def evaluate_model(self, eval_data, batch_size=16):
        if self.model is None:
            print("Please create a model first!")
            return

        self.model.eval()
        total_correct = 0
        total_samples = 0

        with torch.no_grad():
            for i in range(0, len(eval_data), batch_size):
                batch_inputs = self.tokenizer(eval_data['text'][i:i+batch_size], return_tensors='pt', truncation=True, padding=True)
                batch_labels = torch.tensor(eval_data['label'][i:i+batch_size])

                outputs = self.model(**batch_inputs)
                predictions = torch.argmax(outputs.logits, dim=1)
                total_correct += (predictions == batch_labels).sum().item()
                total_samples += batch_labels.size(0)

        accuracy = total_correct / total_samples
        print(f"Evaluation Accuracy: {accuracy:.4f}")

    def predict(self, text):
        if self.model is None:
            print("Please create a model first!")
            return

        self.model.eval()
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
        predicted_class = torch.argmax(probabilities).item()
        print(f"Predicted Class: {predicted_class} (Probability: {probabilities[0][predicted_class]:.4f})")

    def save_model(self, path):
        if self.model is None:
            print("Please create a model first!")
            return

        try:
            os.makedirs(path, exist_ok=True)
            torch.save(self.model.state_dict(), os.path.join(path, f"{self.model_name}.pth"))
            self.tokenizer.save_pretrained(path)
            self.config.to_json_file(os.path.join(path, "config.json"))
            print(f"Model '{self.model_name}' saved successfully!")
        except Exception as e:
            print(f"An error occurred while saving the model: {str(e)}")

    def load_model(self, path):
        try:
            config_path = os.path.join(path, "config.json")
            self.config = AutoModelForSequenceClassification.from_pretrained(path).config
            self.model = AutoModelForSequenceClassification.from_pretrained(path, config=self.config)
            self.model.load_state_dict(torch.load(os.path.join(path, f"{self.model_name}.pth")))
            self.tokenizer = AutoTokenizer.from_pretrained(path)
            print(f"Model '{self.model_name}' loaded successfully!")
        except Exception as e:
            print(f"An error occurred while loading the model: {str(e)}")

def get_training_data_from_terminal():
    training_data = {'text': [], 'label': []}
    print("Enter text and label pairs for training. Type 'done' when finished.")
    while True:
        text = input("Text: ")
        if text.lower() == 'done':
            break
        while True:
            try:
                label = int(input("Label (enter a number): "))
                break
            except ValueError:
                print("Invalid input. Please enter a number for the label.")
        training_data['text'].append(text)
        training_data['label'].append(label)
    return training_data

def main():
    classifier_maker = TextClassifierMaker()

    while True:
        print("\nText Classifier Maker Menu:")
        print("1. Create/Load Model")
        print("2. Train Model")
        print("3. Evaluate Model")
        print("4. Predict")
        print("5. Save Model")
        print("6. Load Model")
        print("7. Exit")

        choice = input("Enter your choice (1-7): ")

        if choice == '1':
            model_name = input("Enter a name for your model: ")
            base_model = input("Enter the name of the pre-trained model to fine-tune (e.g., 'bert-base-uncased'): ")
            num_labels = int(input("Enter the number of classes for your classification task: "))
            classifier_maker.create_model(base_model=base_model, num_labels=num_labels, model_name=model_name)
        elif choice == '2':
            train_data = get_training_data_from_terminal()
            epochs = int(input("Enter number of epochs: "))
            learning_rate = float(input("Enter learning rate: "))
            batch_size = int(input("Enter batch size: "))
            classifier_maker.train_model(train_data, epochs, learning_rate, batch_size)
        elif choice == '3':
            eval_data = get_training_data_from_terminal()
            batch_size = int(input("Enter batch size: "))
            classifier_maker.evaluate_model(eval_data, batch_size)
        elif choice == '4':
            text = input("Enter the text for prediction: ")
            classifier_maker.predict(text)
        elif choice == '5':
            path = input("Enter path to save the model: ")
            classifier_maker.save_model(path)
        elif choice == '6':
            path = input("Enter path to load the model: ")
            model_name = input("Enter the name of the model to load: ")
            classifier_maker.model_name = model_name
            classifier_maker.load_model(path)
        elif choice == '7':
            print("Thank you for using Text Classifier Maker!")
            break
        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()


Text Classifier Maker Menu:
1. Create/Load Model
2. Train Model
3. Evaluate Model
4. Predict
5. Save Model
6. Load Model
7. Exit
Enter your choice (1-7): 1
Enter a name for your model: Starlight
Enter the name of the pre-trained model to fine-tune (e.g., 'bert-base-uncased'): bert-base-uncased
Enter the number of classes for your classification task: 3


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Model 'Starlight' loaded from bert-base-uncased for fine-tuning!

Text Classifier Maker Menu:
1. Create/Load Model
2. Train Model
3. Evaluate Model
4. Predict
5. Save Model
6. Load Model
7. Exit
Enter your choice (1-7): 2
Enter text and label pairs for training. Type 'done' when finished.
Text: I hate you
Label (enter a number): 0
Text: I love you 
Label (enter a number): 1
Text: Done
Enter number of epochs: 5
Enter learning rate: 3
Enter batch size: 16
Epoch 1/5, Loss: 0.5753440260887146
Epoch 2/5, Loss: 15.71849536895752
Epoch 3/5, Loss: 747.3817749023438
Epoch 4/5, Loss: 647.7513427734375
Epoch 5/5, Loss: 215.11788940429688

Text Classifier Maker Menu:
1. Create/Load Model
2. Train Model
3. Evaluate Model
4. Predict
5. Save Model
6. Load Model
7. Exit
Enter your choice (1-7): 5
Enter path to save the model: /content/LLM
Model 'Starlight' saved successfully!

Text Classifier Maker Menu:
1. Create/Load Model
2. Train Model
3. Evaluate Model
4. Predict
5. Save Model
6. Load Model
7. Ex

KeyboardInterrupt: Interrupted by user