In [1]:
import torch
import os

os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('EleutherAI/pythia-160m')
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

tokenizer

GPTNeoXTokenizerFast(name_or_path='EleutherAI/pythia-160m', vocab_size=50254, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '[PAD]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<|padding|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	50254: AddedToken("                        ", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
	50255: AddedToken("                       ", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
	50256: AddedToken("                      ", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
	50257: AddedToken("  

In [2]:
from datasets import load_dataset, concatenate_datasets

dataset = load_dataset('imdb')
dataset = concatenate_datasets(list((dataset.values())))
dataset = dataset.remove_columns(['label'])


def f(data):
    data = [i['text'] for i in data]

    data = tokenizer(data,
                     padding=True,
                     truncation=True,
                     max_length=50,
                     return_tensors='pt').to(device)

    data['labels'] = data['input_ids'].clone()
    select = data['labels'] == tokenizer.pad_token_id
    data['labels'][select] = -100

    return data


loader = torch.utils.data.DataLoader(dataset,
                                     batch_size=16,
                                     shuffle=True,
                                     drop_last=True,
                                     collate_fn=f)

len(loader), next(iter(loader))

(6250,
 {'input_ids': tensor([[   42,   588,  3164,  1900,   564,   281,   923,   247,  9002,    90,
           1219,   282,    67,  6440,    13,   347,   581,  1620,  6057,   672,
            344,   816,  1537,  1056,   247,  1524,  1091,   281,   521,  2469,
          43703,    15,  3771,   581,   390,   767,  1270,  9506,   390,  3104,
            812,  1056,   352, 32811,    15, 30018,  2299,   436,  3085,   816],
         [ 1147,   434,   417,   247, 17325,   533,   604,   368,  1053,   626,
           9012,    13,   368,   588,   320, 12371,  2139,   368,  4821,   281,
           1790,   949,   436,  7357,   253,  1077,   990,    15,  8024,  1102,
          33149,  4824,   436,  3085,   954,   273,   253,  1039,   342,   521,
           7312, 22611,   533,  1014,   344,   476,   626,  1347,   826,    91],
         [ 1909,   247,  7989,   273,  7785,  4591,     8, 26385,   553,    13,
            309,  2427,   281,   923,  1457,  2816,    13,   309, 10540,  1422,
            342, 

In [3]:
from transformers import AutoModelForCausalLM

model_actor = AutoModelForCausalLM.from_pretrained(
    'EleutherAI/pythia-160m').to(device)

model_actor.config

GPTNeoXConfig {
  "_name_or_path": "EleutherAI/pythia-160m",
  "architectures": [
    "GPTNeoXForCausalLM"
  ],
  "attention_bias": true,
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": 0.1,
  "eos_token_id": 0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 2048,
  "model_type": "gpt_neox",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "rope_scaling": null,
  "rotary_emb_base": 10000,
  "rotary_pct": 0.25,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.43.3",
  "use_cache": true,
  "use_parallel_residual": true,
  "vocab_size": 50304
}

In [4]:
optimizer = torch.optim.Adam(model_actor.parameters(), lr=1e-5)

for epoch in range(10):
    for i, data in enumerate(loader):
        out = model_actor(**data)
        out.loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if i % 1000 == 0:
            print(epoch, i, len(loader), out.loss.item())
            prompt = data['input_ids'][0]
            chosen = prompt[5:]
            prompt = prompt[:5]

            gen = model_actor.generate(input_ids=prompt.unsqueeze(0),
                                       min_length=-1,
                                       max_length=32,
                                       pad_token_id=tokenizer.pad_token_id,
                                       eos_token_id=tokenizer.eos_token_id,
                                       top_k=0.0,
                                       top_p=1.0,
                                       do_sample=True)
            gen = gen[0, 5:]

            print('prompt=', tokenizer.decode(prompt))
            print('chosen=', tokenizer.decode(chosen))
            print('gen=', tokenizer.decode(gen))

model_actor.save_pretrained('model/actor')

0 0 6250 4.111677646636963
prompt= Don't mistake "War
chosen=  Inc." for a sharply chiseled satire or a brainy comedy full of inside jokes for news buffs. It isn't.<br /><br />This is an old-fashioned screwball comedy, with ridiculously
gen=  In The Game" for "Public Opinion" even: a stock company." Therefore, printcuring experimentation is not the same as,
0 1000 6250 3.7925894260406494
prompt= How they could've ruined
chosen=  such a wonderful thing is beyond anyone's imagination. Season 1 was magnificent, with an addictive plot, great character development and a slow unveiling of what's to come.<br /><br />Season 2 was so so but slow
gen=  this Movie much would've seemed innocent and decent. Instead, they have turned a stupid comedy with the unwarranted accent against some unpleasant
0 2000 6250 4.0283660888671875
prompt= I am always at a
chosen=  loss of words why so many terrible films are made...this film is no exception. In every single way, this is an embarrassment. Don't get m

3 1000 6250 3.0222320556640625
prompt= Basically the exact same movie
chosen=  as "House of Wax" - Vincent Price's first genuine horror hit released the previous year - but seriously who cares, because "The Mad Magician" offers just as many sheer thrills, delightful period set-pieces
gen=  when it comes to Japanese brothel(Japan?) around the clock.<br /><br />Looks great but the type of movie doesn't
3 2000 6250 2.887450933456421
prompt= This is a nice movie
chosen=  with good performances by Paz Vega and Leonardo Sbaraglia. Of course Vicente Aranda is a legend in Spanish Cinema and surely one of the great directors in Spanish cinema but I don't think this is
gen=  for the whole family of anime fans. This movie is not only small budget but must be distributed among other pretty much everything for entertainment.
3 3000 6250 3.015592575073242
prompt= I was expecting a good
chosen=  story, good character development, excellent acting from Jolie & Banderas, and some great erotic scenes.<b

6 2000 6250 2.522771120071411
prompt= Reign Over Me is
chosen=  a success due to the powerful work by Adam Sandler and Don Cheadle. While comedic actors going dramatic has been seen as somewhat of a distraction, Sandler is no stranger to playing more serious roles. Most of
gen=  a plot short from director Barry Horwath that plays the role of Richard Widmark in the film version of Follow The Fleet. Brandon
6 3000 6250 2.6917171478271484
prompt= The base for this movie
chosen=  is not only its acting and directing but the dialogs are an intricate web of poetry and filosophy. According to a part of dialogue in the movie drugs open your mind so you can see haw everything is relative to each person
gen=  is the relatively astonishing Escape from L.A. from the insane first film "Friday the 13th" released in 1980. It
6 4000 6250 2.693819046020508
prompt= When it exploded onto screens
chosen=  in the mid 1980's, Oscar® nominated director<br /><br />Dominique Deruddere's film CRAZY LOVE divided

9 2000 6250 2.261312484741211
prompt= A powerful "real-
chosen= tv" movie. Very subversive and therefore remaining almost un-broadcasted! (almost...thanx 2 arte in France). After you've watched this manhunt all movies filmed with the same concept (a
gen= tv" movie from start to finish and a compelling and unique one! Directed by Maxwell Caulfield, co-written with Sidney
9 3000 6250 2.0817272663116455
prompt= Decent action scenes,
chosen=  but the movie is saddled with a slow, convoluted storyline, nearly non-existent dialogue that leads to minimal character development, and a seriously horrible storyline...<br /><br />Did I mention that the storyline made
gen=  but very poor execution. How anyone was paid to make this film is amazing. This is probably why directing is so often a instinct --
9 4000 6250 2.0197558403015137
prompt= This film is about C
chosen= indy Campell, who moves to a new house, to take care of an old woman. Her next door neighbor is Tom Ryan. Suddenly, alien tripods 