In [None]:
from disco.scorers import BooleanScorer
from disco.distributions import LMDistribution
from disco.distributions.context_distribution import ContextDistribution
from disco.samplers import AccumulationSampler
from disco.tuners import DPGTuner
from disco.tuners.loggers.console import ConsoleLogger
import torch
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    AutoModelForCausalLM,
)
import sympy as sp

# Basic experimental setup

define b(y)

In [None]:
score_tokenizer = AutoTokenizer.from_pretrained("michellejieli/emotion_text_classifier")
score_model = AutoModelForSequenceClassification.from_pretrained("michellejieli/emotion_text_classifier", num_labels=7).to('cuda')

def sentiment_pipe(story): # joy_class is 4th class of output probabilities.
    return Softmax(dim=-1)(score_model(**score_tokenizer(story, return_tensors="pt", max_length=512, truncation=True).to('cuda')).logits)[:,3].item()


def is_positive(story="", t=0.98, prefix=""):
    story = prefix+story
    story = story.split('<|endoftext|>')[0]
    story = story.split('. ')
    if sentiment_pipe(story[-1]) > t:
        return True
    else:
        return False

b = lambda s, c: is_positive(story=s.text, t=0.98, prefix=prefix) # hard constraint

Negative openings

In [None]:
prefix = ContextDistribution('datasets/ROC-negative.txt')
prefix.contexts

# CAP experiments

In [None]:
a = LMDistribution("msintaha/gpt2-finetuned-rocstories", LLM=True, length = 80)
prefix = "My uncle couldn't afford health care. He got sick last year."

In [None]:
prompt = prefix
distr = AccumulationSampler(distribution=a, total_size=500000) # In this part, we define the sampling size.
samples_a, distr_a = distr.sample(sampling_size=500, context=prompt)

In [None]:
CAP = "This is my happy ending story, "
CAP += prefix
CAP

In [None]:
distr = AccumulationSampler(distribution=a, total_size=200000) # In this part, we define the sampling size.
samples_a2, distr_a2 = distr.sample(sampling_size=500, context=CAP) 

In [None]:
Z = len(samples_g) / len(samples_a)
Z2 = len(samples_g2) / len(samples_a2)
print('Z and log(Z):', Z, sp.log(Z))
print("Z' and log(Z'):", Z2, sp.log(Z2))

In [None]:
score = []
for it, item in enumerate(samples_g):
    score.append(a.log_score([samples_g[it]], context=prompt) - a.log_score([samples_g[it]], context=gop))
        
estimator_g = sum(score)/len(score)

print(estimator_g)

score2 = []
for it, item in enumerate(samples_g2):
    score2.append(a.log_score([samples_g2[it]], context=gop) - a.log_score([samples_g2[it]], context=prompt))
        
estimator_g2 = sum(score2)/len(score2)

print(estimator_g2)

In [None]:
print("KL(g|a')", estimator_g-sp.log(Z))
print("AR upgrade: ", Z2/Z)
print("KL(g|g')", estimator_g-sp.log(Z)+sp.log(Z2))
print("KL(g'|g)", estimator_g2-sp.log(Z2)+sp.log(Z))
print("AR of a2: ", Z2)

# DPG Training

In [None]:
proposal_model_name = "msintaha/gpt2-finetuned-rocstories"
model_name = "msintaha/gpt2-finetuned-rocstories"
# model_name = "models/joy/gop-dpg-400k"
prefix = "My uncle couldn't afford health care. He got sick last year."
prefix

In [None]:
prompt = prefix
prompt

In [None]:
token = "" # your own huggingface token

proposal = LMDistribution(proposal_model_name, token=token, LLM=True, length=80)
a2 = LMDistribution(model_name, token=token, freeze=False, LLM=True, length=80)

In [None]:
b = lambda s, c: is_positive(story=s.text, t=0.98, prefix=prefix) # hard constraint
scorer = BooleanScorer(b)
g = proposal * scorer

In [None]:
tuner = DPGTuner(a2, g,
        warmup_steps=1,
        context=prompt,
        n_gradient_steps=200,
        n_samples_per_step=10000,
        sampling_size=500,
        scoring_size=500,
        divergence_evaluation_interval=10)

ConsoleLogger(tuner)

tuner.tune()

In [None]:
a2.save(path="models/positive/dpg-2m")

In [None]:
model_name="msintaha/gpt2-finetuned-rocstories"
a = LMDistribution(model_name, token=token, LLM=True, length=80)

distr = AccumulationSampler(distribution=a, total_size=500000)
samples_a, distr_a = distr.sample(sampling_size=500, context=prompt)

In [None]:
model_name="models/positive/dpg-2m"
a2 = LMDistribution(model_name, token=token, LLM=True, length=80)

distr = AccumulationSampler(distribution=a2, total_size=200000)
samples_a2, distr_a2 = distr.sample(sampling_size=500, context=prompt)

In [None]:
samples_g = []
distr_g = []
for i in range(len(samples_a)):
    if b(samples_a[i], _):
        samples_g.append(samples_a[i])
        distr_g.append(distr_a[i])
len(samples_g)

samples_g2 = []
distr_g2 = []
for i in range(len(samples_a2)):
    if b(samples_a2[i], _):
        samples_g2.append(samples_a2[i])
        distr_g2.append(distr_a2[i])
len(samples_g2)

In [None]:
Z = len(samples_g) / len(samples_a)
Z2 = len(samples_g2) / len(samples_a2)
print('Z and log(Z):', Z, sp.log(Z))
print("Z' and log(Z'):", Z2, sp.log(Z2))

In [None]:
score = []
for it, item in enumerate(samples_g):
    score.append(a.log_score([samples_g[it]], context=prompt) - a2.log_score([samples_g[it]], context=prompt))
        
estimator_g = sum(score)/len(score)

print(estimator_g)

score2 = []
for it, item in enumerate(samples_g2):
    score2.append(a2.log_score([samples_g2[it]], context=prompt) - a.log_score([samples_g2[it]], context=prompt))
        
estimator_g2 = sum(score2)/len(score2)

print(estimator_g2)

In [None]:
print("KL(g|a')", estimator_g-sp.log(Z))
print("AR upgrade: ", Z2/Z)
print("KL(g|g')", estimator_g-sp.log(Z)+sp.log(Z2))
print("KL(g'|g)", estimator_g2-sp.log(Z2)+sp.log(Z))
print("AR of a2: ", Z2)

# SFT Training

In [None]:
model_name = "msintaha/gpt2-finetuned-rocstories"

In [None]:
prefix = "My uncle couldn't afford health care. He got sick last year."
prefix

In [None]:
prompt = prefix
prompt

In [None]:
token = "" # your own huggingface token

a = LMDistribution(model_name, token=token, mb_mode=True, length=80)
b = lambda s, c: is_positive(story=s.text, t=0.98, prefix=prefix) # hard constraint

In [None]:
distr = AccumulationSampler(distribution=a, total_size=2000000) # 400,000
samples_a, distr_a = distr.sample(sampling_size=500, context=prompt)

In [None]:
samples_g = []

for it, item in enumerate(samples_a[:2000000]): # y ~ a
    if b(item, _): # if b(y) = 1
        samples_g.append({'text': prompt + item[1]}) # return y

print(len(samples_g))

In [None]:
from trl import SFTTrainer
from datasets import load_dataset, Dataset

training_args = TrainingArguments(
    output_dir='models/happy/sft',
    per_device_train_batch_size=64,
    num_train_epochs=1,
    learning_rate=5e-06,
    evaluation_strategy='steps',
    logging_steps=100,
    bf16=True,
    report_to="none",
)

ds_train = Dataset.from_list(samples_g) 
ds_val = Dataset.from_list(samples_g) 
ds_train = ds_train.shuffle()

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
a0 = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
# a.to('cuda')

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

trainer = SFTTrainer(
    model=a0,
    args=training_args,
    train_dataset=ds_train,
    eval_dataset=ds_val,
    max_seq_length=1200,
    tokenizer=tokenizer,
    dataset_text_field='text',
)

In [None]:
trainer.train()
trainer.save_model("models/positive/sft-2m")

In [None]:
a = LMDistribution("msintaha/gpt2-finetuned-rocstories", token=token, LLM=True, length=80)

distr = AccumulationSampler(distribution=a, total_size=500000)
samples_a, distr_a = distr.sample(sampling_size=500, context=prompt)

In [None]:
a2 = LMDistribution("models/positive/sft-2m", token=token, LLM=True, length=80)

distr = AccumulationSampler(distribution=a2, total_size=200000)
samples_a2, distr_a2 = distr.sample(sampling_size=500, context=prompt)

In [None]:
samples_g = []
distr_g = []
for i in range(len(samples_a)):
    if b(samples_a[i], _):
        samples_g.append(samples_a[i])
        distr_g.append(distr_a[i])
len(samples_g)

samples_g2 = []
distr_g2 = []
for i in range(len(samples_a2)):
    if b(samples_a2[i], _):
        samples_g2.append(samples_a2[i])
        distr_g2.append(distr_a2[i])
len(samples_g2)

In [None]:
Z = len(samples_g) / len(samples_a)
Z2 = len(samples_g2) / len(samples_a2)
print('Z and log(Z):', Z, sp.log(Z))
print("Z' and log(Z'):", Z2, sp.log(Z2))

In [None]:
score = []
for it, item in enumerate(samples_g):
    score.append(a.log_score([samples_g[it]], context=prompt) - a2.log_score([samples_g[it]], context=prompt))
        
estimator_g = sum(score)/len(score)

print(estimator_g)

score2 = []
for it, item in enumerate(samples_g2):
    score2.append(a2.log_score([samples_g2[it]], context=prompt) - a.log_score([samples_g2[it]], context=prompt))
        
estimator_g2 = sum(score2)/len(score2)

print(estimator_g2)

In [None]:
print("Delta", -estimator_g)
print("KL(g|a')", estimator_g-sp.log(Z))
print("AR of a2: ", Z2)
print("KL(g|g')", estimator_g-sp.log(Z)+sp.log(Z2))
print("KL(g'|g)", estimator_g2-sp.log(Z2)+sp.log(Z))

# warm-start DPG

In [None]:
model_name = "msintaha/gpt2-finetuned-rocstories"
prefix = "My uncle couldn't afford health care. He got sick last year."

token = "" # your own huggingface token

a = LMDistribution(model_name, token=token, mb_mode=True, length=80)
b = lambda s, c: is_positive(story=s.text, t=0.98, prefix=prefix) # hard constraint

In [None]:
CAP = "This is my happy ending story, "
CAP += prefix
CAP

In [None]:
distr = AccumulationSampler(distribution=a, total_size=10000) # 400,000
samples_a, distr_a = distr.sample(sampling_size=500, context=CAP)

In [None]:
samples_g = []

for it, item in enumerate(samples_a[:10000]): # y ~ a
    if b(item, _): # if b(y) = 1
        samples_g.append({'text': prefix + item[1]}) # return y

print(len(samples_g))

In [None]:
from trl import SFTTrainer
from datasets import load_dataset, Dataset

training_args = TrainingArguments(
    output_dir='models/happy/sft',
    per_device_train_batch_size=24,
    num_train_epochs=1,
    learning_rate=5e-06,
    evaluation_strategy='steps',
    logging_steps=100,
    bf16=True,
    report_to="none",
)

ds_train = Dataset.from_list(samples_g) 
ds_val = Dataset.from_list(samples_g) 
ds_train = ds_train.shuffle()

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
a0 = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
# a.to('cuda')

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

trainer = SFTTrainer(
    model=a0,
    args=training_args,
    train_dataset=ds_train,
    eval_dataset=ds_val,
    max_seq_length=70,
    tokenizer=tokenizer,
    dataset_text_field='text',
)

In [None]:
trainer.train()
trainer.save_model("models/positive/ws")

In [None]:
proposal_model_name = "msintaha/gpt2-finetuned-rocstories"
model_name = "models/positive/ws"
prefix = "My uncle couldn't afford health care. He got sick last year."
prompt = prefix

In [None]:
token = "" # your own huggingface token

proposal = LMDistribution(proposal_model_name, token=token, LLM=True, length=80)
a2 = LMDistribution(model_name, token=token, freeze=False, LLM=True, length=80)

In [None]:
b = lambda s, c: is_positive(story=s.text, t=0.98, prefix=prefix) # hard constraint
scorer = BooleanScorer(b)
g = proposal * scorer

In [None]:
tuner = DPGTuner(a2, g,
        warmup_steps=1,
        context=prompt,
        n_gradient_steps=199,
        n_samples_per_step=10000,
        sampling_size=500,
        scoring_size=500,
        divergence_evaluation_interval=10)

ConsoleLogger(tuner)

tuner.tune()

In [None]:
a2.save(path="models/positive/wsdpg-2m")

In [None]:
model_name="msintaha/gpt2-finetuned-rocstories"
a = LMDistribution(model_name, token=token, LLM=True, length=80)

distr = AccumulationSampler(distribution=a, total_size=500000)
samples_a, distr_a = distr.sample(sampling_size=500, context=prompt)

In [None]:
model_name="models/positive/wsdpg-2m"
a2 = LMDistribution(model_name, token=token, LLM=True, length=80)

distr = AccumulationSampler(distribution=a2, total_size=200000)
samples_a2, distr_a2 = distr.sample(sampling_size=500, context=prompt)

In [None]:
samples_g = []
distr_g = []
for i in range(len(samples_a)):
    if b(samples_a[i], _):
        samples_g.append(samples_a[i])
        distr_g.append(distr_a[i])
len(samples_g)

samples_g2 = []
distr_g2 = []
for i in range(len(samples_a2)):
    if b(samples_a2[i], _):
        samples_g2.append(samples_a2[i])
        distr_g2.append(distr_a2[i])
len(samples_g2)

In [None]:
Z = len(samples_g) / len(samples_a)
Z2 = len(samples_g2) / len(samples_a2)
print('Z and log(Z):', Z, sp.log(Z))
print("Z' and log(Z'):", Z2, sp.log(Z2))

In [None]:
score = []
for it, item in enumerate(samples_g):
    score.append(a.log_score([samples_g[it]], context=prompt) - a2.log_score([samples_g[it]], context=prompt))
        
estimator_g = sum(score)/len(score)

print(estimator_g)

score2 = []
for it, item in enumerate(samples_g2):
    score2.append(a2.log_score([samples_g2[it]], context=prompt) - a.log_score([samples_g2[it]], context=prompt))
        
estimator_g2 = sum(score2)/len(score2)

print(estimator_g2)

In [None]:
print("Delta", -estimator_g)
print("KL(g|a')", estimator_g-sp.log(Z))
print("AR of a2: ", Z2)
print("KL(g|g')", estimator_g-sp.log(Z)+sp.log(Z2))
print("KL(g'|g)", estimator_g2-sp.log(Z2)+sp.log(Z))