# **Deep Haiku Generator**
### Generating Haikus after finetuning a large transformer with structures poems, their phonemes and using topics.

Fine tuning the model needs enough compute resources  Training - hence using the Tuned model


## Acknowledgements

@software{DeepHaiku,
  author  = {Gonsalves, Robert A.},
  title   = {Deep Haiku: Teaching GPT-J to Compose with Syllable Patterns},
  url     = {https://github.com/robgon-art/DeepHaiku},
  year    = 2022,
  month   = February
}

# Initialisation and Setup

In [None]:
# %%capture
# !pip install --upgrade --no-cache-dir gdown
## !git clone https://github.com/unitaryai/detoxify
# !pip install transformers==4.16.2
# !pip install bitsandbytes-cuda111
## !git clone https://github.com/robgon-art/GRUEN
# !pip install wmd
# !pip install --upgrade --no-cache-dir gdown
# !gdown --id 1S-l0L_YOzn5KhYHdB8iS37qKwuUhHP0G
# !gdown --id 10LpkO5Vm_zOu723FVk6cCeRsv_qyYLdL
# !unzip cola_model.zip
# !pip install phonemizer
# !sudo apt-get install festival

import transformers
import torch
import torch.nn.functional as F
from torch import nn
from torch.cuda.amp import custom_fwd, custom_bwd
from bitsandbytes.functional import quantize_blockwise, dequantize_blockwise
from tqdm.auto import tqdm
from phonemizer import phonemize
from phonemizer.separator import Separator
import nltk
nltk.download('punkt')
import pandas as pd
import numpy as np

Required Classes and Functions 

In [None]:
def get_festival_phonemes(line):
  phn = phonemize(line, language='en-us', backend='festival', with_stress=False,
      separator=Separator(phone=None, word=' ', syllable="|"), strip=True)
  return phn

text = ["pet pug arthur"]
get_festival_phonemes(text)

In [None]:
class FrozenBNBLinear(nn.Module):
    def __init__(self, weight, absmax, code, bias=None):
        assert isinstance(bias, nn.Parameter) or bias is None
        super().__init__()
        self.out_features, self.in_features = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        self.register_buffer("absmax", absmax.requires_grad_(False))
        self.register_buffer("code", code.requires_grad_(False))
        self.adapter = None
        self.bias = bias
 
    def forward(self, input):
        output = DequantizeAndLinear.apply(input, self.weight, self.absmax, self.code, self.bias)
        if self.adapter:
            output += self.adapter(input)
        return output
 
    @classmethod
    def from_linear(cls, linear: nn.Linear) -> "FrozenBNBLinear":
        weights_int8, state = quantize_blockise_lowmemory(linear.weight)
        return cls(weights_int8, *state, linear.bias)
 
    def __repr__(self):
        return f"{self.__class__.__name__}({self.in_features}, {self.out_features})"
 
 
class DequantizeAndLinear(torch.autograd.Function): 
    @staticmethod
    @custom_fwd
    def forward(ctx, input: torch.Tensor, weights_quantized: torch.ByteTensor,
                absmax: torch.FloatTensor, code: torch.FloatTensor, bias: torch.FloatTensor):
        weights_deq = dequantize_blockwise(weights_quantized, absmax=absmax, code=code)
        ctx.save_for_backward(input, weights_quantized, absmax, code)
        ctx._has_bias = bias is not None
        return F.linear(input, weights_deq, bias)
 
    @staticmethod
    @custom_bwd
    def backward(ctx, grad_output: torch.Tensor):
        assert not ctx.needs_input_grad[1] and not ctx.needs_input_grad[2] and not ctx.needs_input_grad[3]
        input, weights_quantized, absmax, code = ctx.saved_tensors
        # grad_output: [*batch, out_features]
        weights_deq = dequantize_blockwise(weights_quantized, absmax=absmax, code=code)
        grad_input = grad_output @ weights_deq
        grad_bias = grad_output.flatten(0, -2).sum(dim=0) if ctx._has_bias else None
        return grad_input, None, None, None, grad_bias
 
 
class FrozenBNBEmbedding(nn.Module):
    def __init__(self, weight, absmax, code):
        super().__init__()
        self.num_embeddings, self.embedding_dim = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        self.register_buffer("absmax", absmax.requires_grad_(False))
        self.register_buffer("code", code.requires_grad_(False))
        self.adapter = None
 
    def forward(self, input, **kwargs):
        with torch.no_grad():
            # note: both quantuized weights and input indices are *not* differentiable
            weight_deq = dequantize_blockwise(self.weight, absmax=self.absmax, code=self.code)
            output = F.embedding(input, weight_deq, **kwargs)
        if self.adapter:
            output += self.adapter(input)
        return output 
 
    @classmethod
    def from_embedding(cls, embedding: nn.Embedding) -> "FrozenBNBEmbedding":
        weights_int8, state = quantize_blockise_lowmemory(embedding.weight)
        return cls(weights_int8, *state)
 
    def __repr__(self):
        return f"{self.__class__.__name__}({self.num_embeddings}, {self.embedding_dim})"
 
def quantize_blockise_lowmemory(matrix: torch.Tensor, chunk_size: int = 2 ** 20):
    assert chunk_size % 4096 == 0
    code = None
    chunks = []
    absmaxes = []
    flat_tensor = matrix.view(-1)
    for i in range((matrix.numel() - 1) // chunk_size + 1):
        input_chunk = flat_tensor[i * chunk_size: (i + 1) * chunk_size].clone()
        quantized_chunk, (absmax_chunk, code) = quantize_blockwise(input_chunk, code=code)
        chunks.append(quantized_chunk)
        absmaxes.append(absmax_chunk)
 
    matrix_i8 = torch.cat(chunks).reshape_as(matrix)
    absmax = torch.cat(absmaxes)
    return matrix_i8, (absmax, code)
 
 
def convert_to_int8(model):
    """Convert linear and embedding modules to 8-bit with optional adapters"""
    for module in list(model.modules()):
        for name, child in module.named_children():
            if isinstance(child, nn.Linear):
                print(name, child)
                setattr( 
                    module,
                    name,
                    FrozenBNBLinear(
                        weight=torch.zeros(child.out_features, child.in_features, dtype=torch.uint8),
                        absmax=torch.zeros((child.weight.numel() - 1) // 4096 + 1),
                        code=torch.zeros(256),
                        bias=child.bias,
                    ),
                )
            elif isinstance(child, nn.Embedding):
                setattr(
                    module,
                    name,
                    FrozenBNBEmbedding(
                        weight=torch.zeros(child.num_embeddings, child.embedding_dim, dtype=torch.uint8),
                        absmax=torch.zeros((child.weight.numel() - 1) // 4096 + 1),
                        code=torch.zeros(256),
                    )
                )

class GPTJBlock(transformers.models.gptj.modeling_gptj.GPTJBlock):
    def __init__(self, config):
        super().__init__(config)

        convert_to_int8(self.attn)
        convert_to_int8(self.mlp)


class GPTJModel(transformers.models.gptj.modeling_gptj.GPTJModel):
    def __init__(self, config):
        super().__init__(config)
        convert_to_int8(self)
        

class GPTJForCausalLM(transformers.models.gptj.modeling_gptj.GPTJForCausalLM):
    def __init__(self, config):
        super().__init__(config)
        convert_to_int8(self)

print("Init Done!")

Load Model 

In [None]:
transformers.models.gptj.modeling_gptj.GPTJBlock = GPTJBlock  # monkey-patch GPT-J

import transformers
config = transformers.GPTJConfig.from_pretrained("EleutherAI/gpt-j-6B")
tokenizer = transformers.AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

gpt = torch.load("/content/gpt-j-8bit_deep_haikul.pt",  map_location=torch.device('cuda'))
gpt.eval()

# Generator Function
Input Topic to Generate poems 

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [49]:
def generator(topic:str ='nature',max_length=40,num_poems = 50):
  '''
  Generator for Haikus - strcutred poems 

  Inputs:
    topic: Any string type topic
    max_length: max length of the poem
    num_poems: total number of poems to be generated
  Output:
    A pandas Dataframe with sent_1,sent_2,sent_3 as columns representing 3 lines of poem
  '''
  
  input = "(" + topic.strip()
  if not "=" in topic:
    input += " ="
    print("'" + input + "'")
  with torch.no_grad():
    input_tokens = tokenizer(input, return_tensors="pt").input_ids.cuda()
    sample_outputs = gpt.generate(input_tokens, max_length=max_length, do_sample=True, num_return_sequences=num_poems, temperature=0.8)
      
  haikus = []

  for i, sample_output in enumerate(sample_outputs):
    doc = (tokenizer.decode(sample_outputs[i], skip_special_tokens=True))
    haiku = doc.split(")")[0][1:].strip().split(" = ")[1].strip()
    haikus.append(haiku)

  print("Deep Haiku Generation for " + topic.upper() + " #Haikus generated: " + str(len(haikus)))
  haikus = pd.Series(haikus).str.replace(". / ", " / ")
  temp = haikus.str.split('/',expand=True)
  if len(temp.columns)>3:
    for i in range(3,len(temp.columns)):
      d = list(np.where(pd.isna(temp[i])==False)[0])
      temp = temp.drop(d,axis=0)
  temp = temp[[0,1,2]].copy()
  temp.columns = ['sent_1','sent_2','sent_3']

  '''Takes time'''
  # temp['toxicity'] = list(haikus.apply(lambda h: Detoxify('original').predict(h)["toxicity"]))
  return temp


In [50]:
temp = generator() #default topic: nature
final = temp 
topics = ['autumn','machine learning', 'AI','spring','butterflies']
for t in topics:
  print(t)
  final = pd.concat([final,generator(t)])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'(nature ='
Deep Haiku Generation for NATURE #Haikus generated: 50


In [51]:
final.head()

Unnamed: 0,sent_1,sent_2,sent_3
0,"Oh, the beauty of","Unrestrained nature, please",Stay out of my house.
1,Live your purpose in,Balance with the nature,Spirit nature.
2,The natural world,No longer bothers to decor,Dress up for the cams.
3,The nature of man,Can't explain how I became,What I am now.
4,Take me back to nature,Take me back to the mountains,Take me back to pain.


In [47]:
final.to_csv('deep_haiku_op.csv',index=False)