In [None]:
!pip install --quiet transformers datasets accelerate nvidia-ml-py3
!pip install -U adapter-transformers

In [None]:
from transformers import AutoTokenizer 
from transformers import TrainingArguments, AutoModelWithLMHead
from transformers import AutoModelForCausalLM
from transformers import AutoConfig

config = AutoConfig.from_pretrained("gpt2-xl")
model = AutoModelForCausalLM.from_pretrained("gpt2-xl")
tokenizer = AutoTokenizer.from_pretrained("gpt2-xl")

In [2]:
train_path = './YourDatasetHere.txt'

In [None]:
model.half()

In [None]:
from transformers import TextDataset, DataCollatorForLanguageModeling

def load_dataset(train_path, tokenizer):
    train_dataset = TextDataset(
          tokenizer=tokenizer,
          file_path=train_path,
          block_size=100
          )
          
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer, mlm=False,
    )
    return train_dataset, data_collator

train_dataset, data_collator = load_dataset(train_path, tokenizer)

In [None]:
from transformers import AdapterType, AdapterConfig, AdapterTrainer

# Define the adapter configuration with increased number of parameters
adapter_config = AdapterConfig(
    mh_adapter=True,
    output_adapter=True,
    reduction_factor=3, #Any positive value will have a greater amount of parameters than 0
    non_linearity='swish',
    phm_dim=100,
    phm_layer=False,
)


adapter = model.add_adapter("Object-J", config=adapter_config)

# Activate the adapter
adapter = model.train_adapter("Object-J")

In [None]:
training_args = TrainingArguments(
    output_dir="./Object-JClass",
    overwrite_output_dir=True,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    save_total_limit=40,         
    gradient_accumulation_steps=1,
    gradient_checkpointing=False,    #set the gradient_checkpointing=True, to recude vram usage
    save_steps=30000,
    prediction_loss_only=True,
    optim="adafactor",       
    logging_steps=150,
)

In [None]:
trainer = AdapterTrainer(
        model=model,
        args=training_args,
        tokenizer=tokenizer,
        train_dataset=train_dataset,
        data_collator=data_collator, 
    )

# !nvidia-smi

In [None]:
trainer.train()

In [None]:
trainer.save_model()
model.save_adapter("Object-J_adapter", "Object-J")

In [None]:
model.to('cuda')
prompt = tokenizer("How does gravity affect earth's shape?" + " >>>", return_tensors='pt')
prompt = {key: value.to('cuda') for key, value in prompt.items()}
out = model.generate(**prompt, min_length=90, max_length=200, temperature=0.3 ,no_repeat_ngram_size=3, do_sample=True)
tokenizer.decode(out[0])

The End