In [1]:
!pip install transformers
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.41.0-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.41.0


In [2]:
import transformers

import torch
import torch.nn.functional as F
from torch import nn
from torch.cuda.amp import custom_fwd, custom_bwd 

from bitsandbytes.functional import quantize_blockwise, dequantize_blockwise

from tqdm.auto import tqdm



In [3]:
class FrozenBNBLinear(nn.Module):
    def __init__(self, weight, absmax, code, bias=None):
        assert isinstance(bias, nn.Parameter) or bias is None
        super().__init__()
        self.out_features, self.in_features = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        self.register_buffer("absmax", absmax.requires_grad_(False))
        self.register_buffer("code", code.requires_grad_(False))
        self.adapter = None
        self.bias = bias
 
    def forward(self, input):
        output = DequantizeAndLinear.apply(input, self.weight, self.absmax, self.code, self.bias)
        if self.adapter:
            output_cloned = torch.clone(output + self.adapter(input))
            return output_cloned
        else :
            return output
 
    @classmethod
    def from_linear(cls, linear: nn.Linear) -> "FrozenBNBLinear":
        weights_int8, state = quantize_blockise_lowmemory(linear.weight)
        return cls(weights_int8, *state, linear.bias)
 
    def __repr__(self):
        return f"{self.__class__.__name__}({self.in_features}, {self.out_features})"
 
 
class DequantizeAndLinear(torch.autograd.Function): 
    @staticmethod
    @custom_fwd
    def forward(ctx, input: torch.Tensor, weights_quantized: torch.ByteTensor,
                absmax: torch.FloatTensor, code: torch.FloatTensor, bias: torch.FloatTensor):
        weights_deq = dequantize_blockwise(weights_quantized, absmax=absmax, code=code)
        ctx.save_for_backward(input, weights_quantized, absmax, code)
        ctx._has_bias = bias is not None
        return F.linear(input, weights_deq, bias)
 
    @staticmethod
    @custom_bwd
    def backward(ctx, grad_output: torch.Tensor):
        assert not ctx.needs_input_grad[1] and not ctx.needs_input_grad[2] and not ctx.needs_input_grad[3]
        input, weights_quantized, absmax, code = ctx.saved_tensors
        # grad_output: [*batch, out_features]
        weights_deq = dequantize_blockwise(weights_quantized, absmax=absmax, code=code)
        grad_input = grad_output @ weights_deq
        grad_bias = grad_output.flatten(0, -2).sum(dim=0) if ctx._has_bias else None
        return grad_input, None, None, None, grad_bias
 
 
class FrozenBNBEmbedding(nn.Module):
    def __init__(self, weight, absmax, code):
        super().__init__()
        self.num_embeddings, self.embedding_dim = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        self.register_buffer("absmax", absmax.requires_grad_(False))
        self.register_buffer("code", code.requires_grad_(False))
        self.adapter = None
 
    def forward(self, input, **kwargs):
        with torch.no_grad():
            # note: both quantuized weights and input indices are *not* differentiable
            weight_deq = dequantize_blockwise(self.weight, absmax=self.absmax, code=self.code)
            output = F.embedding(input, weight_deq, **kwargs)
        if self.adapter:
            
            output_cloned = torch.clone(output + self.adapter(input))
            return output_cloned
        else :
            return output 
 
    @classmethod
    def from_embedding(cls, embedding: nn.Embedding) -> "FrozenBNBEmbedding":
        weights_int8, state = quantize_blockise_lowmemory(embedding.weight)
        return cls(weights_int8, *state)
 
    def __repr__(self):
        return f"{self.__class__.__name__}({self.num_embeddings}, {self.embedding_dim})"
 
 
def quantize_blockise_lowmemory(matrix: torch.Tensor, chunk_size: int = 2 ** 20):
    assert chunk_size % 4096 == 0
    code = None
    chunks = []
    absmaxes = []
    flat_tensor = matrix.view(-1)
    for i in range((matrix.numel() - 1) // chunk_size + 1):
        input_chunk = flat_tensor[i * chunk_size: (i + 1) * chunk_size].clone()
        quantized_chunk, (absmax_chunk, code) = quantize_blockwise(input_chunk, code=code)
        chunks.append(quantized_chunk)
        absmaxes.append(absmax_chunk)
 
    matrix_i8 = torch.cat(chunks).reshape_as(matrix)
    absmax = torch.cat(absmaxes)
    return matrix_i8, (absmax, code)
 
 
def convert_to_int8(model):
    """Convert linear and embedding modules to 8-bit with optional adapters"""
    for module in list(model.modules()):
        for name, child in module.named_children():
            if isinstance(child, nn.Linear):
                print(name, child)
                setattr( 
                    module,
                    name,
                    FrozenBNBLinear(
                        weight=torch.zeros(child.out_features, child.in_features, dtype=torch.uint8),
                        absmax=torch.zeros((child.weight.numel() - 1) // 4096 + 1),
                        code=torch.zeros(256),
                        bias=child.bias,
                    ),
                )
            elif isinstance(child, nn.Embedding):
                setattr(
                    module,
                    name,
                    FrozenBNBEmbedding(
                        weight=torch.zeros(child.num_embeddings, child.embedding_dim, dtype=torch.uint8),
                        absmax=torch.zeros((child.weight.numel() - 1) // 4096 + 1),
                        code=torch.zeros(256),
                    )
                )
     

In [4]:
class GPTJBlock(transformers.models.gptj.modeling_gptj.GPTJBlock):
    def __init__(self, config):
        super().__init__(config)

        convert_to_int8(self.attn)
        convert_to_int8(self.mlp)


class GPTJModel(transformers.models.gptj.modeling_gptj.GPTJModel):
    def __init__(self, config):
        super().__init__(config)
        convert_to_int8(self)
        

class GPTJForCausalLM(transformers.models.gptj.modeling_gptj.GPTJForCausalLM):
    def __init__(self, config):
        super().__init__(config)
        convert_to_int8(self)


transformers.models.gptj.modeling_gptj.GPTJBlock = GPTJBlock  # monkey-patch GPT-J

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [5]:
config = transformers.GPTJConfig.from_pretrained("EleutherAI/gpt-j-6B")
tokenizer = transformers.AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

Downloading (…)lve/main/config.json:   0%|          | 0.00/930 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/4.04k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/357 [00:00<?, ?B/s]

In [6]:
gpt = GPTJForCausalLM.from_pretrained("hivemind/gpt-j-6B-8bit", low_cpu_mem_usage=True)

if torch.cuda.is_available():  
    dev = "cuda:0" 
else:  
    dev = "cpu"  
device = torch.device(dev)  

gpt.to(device)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

k_proj Linear(in_features=4096, out_features=4096, bias=False)
v_proj Linear(in_features=4096, out_features=4096, bias=False)
q_proj Linear(in_features=4096, out_features=4096, bias=False)
out_proj Linear(in_features=4096, out_features=4096, bias=False)
fc_in Linear(in_features=4096, out_features=16384, bias=True)
fc_out Linear(in_features=16384, out_features=4096, bias=True)
k_proj Linear(in_features=4096, out_features=4096, bias=False)
v_proj Linear(in_features=4096, out_features=4096, bias=False)
q_proj Linear(in_features=4096, out_features=4096, bias=False)
out_proj Linear(in_features=4096, out_features=4096, bias=False)
fc_in Linear(in_features=4096, out_features=16384, bias=True)
fc_out Linear(in_features=16384, out_features=4096, bias=True)
k_proj Linear(in_features=4096, out_features=4096, bias=False)
v_proj Linear(in_features=4096, out_features=4096, bias=False)
q_proj Linear(in_features=4096, out_features=4096, bias=False)
out_proj Linear(in_features=4096, out_features=4096, 

GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): FrozenBNBEmbedding(50400, 4096)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-27): 28 x GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): FrozenBNBLinear(4096, 4096)
          (v_proj): FrozenBNBLinear(4096, 4096)
          (q_proj): FrozenBNBLinear(4096, 4096)
          (out_proj): FrozenBNBLinear(4096, 4096)
        )
        (mlp): GPTJMLP(
          (fc_in): FrozenBNBLinear(4096, 16384)
          (fc_out): FrozenBNBLinear(16384, 4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): FrozenBNBLinear(4096, 50400)
)

In [7]:
def add_adapters(model, adapter_dim=4, p = 0.1):
    assert adapter_dim > 0

    for name, module in model.named_modules():
      if isinstance(module, FrozenBNBLinear):
          if "attn" in name or "mlp" in name or "head" in name:
              print("Adding adapter to", name)
              module.adapter = nn.Sequential(
                nn.Linear(module.in_features, adapter_dim, bias=False),
                nn.Dropout(p=p),
                nn.Linear(adapter_dim, module.out_features, bias=False),
            )
              print("Initializing", name)
              nn.init.zeros_(module.adapter[2].weight)

          else:
              print("Not adding adapter to", name)
      elif isinstance(module, FrozenBNBEmbedding):
          print("Adding adapter to", name)
          module.adapter = nn.Sequential(
                nn.Embedding(module.num_embeddings, adapter_dim),
                nn.Dropout(p=p),
                nn.Linear(adapter_dim, module.embedding_dim, bias=False),
            )
          print("Initializing", name)
          nn.init.zeros_(module.adapter[2].weight)

add_adapters(gpt)
gpt.to(device)

Adding adapter to transformer.wte
Initializing transformer.wte
Adding adapter to transformer.h.0.attn.k_proj
Initializing transformer.h.0.attn.k_proj
Adding adapter to transformer.h.0.attn.v_proj
Initializing transformer.h.0.attn.v_proj
Adding adapter to transformer.h.0.attn.q_proj
Initializing transformer.h.0.attn.q_proj
Adding adapter to transformer.h.0.attn.out_proj
Initializing transformer.h.0.attn.out_proj
Adding adapter to transformer.h.0.mlp.fc_in
Initializing transformer.h.0.mlp.fc_in
Adding adapter to transformer.h.0.mlp.fc_out
Initializing transformer.h.0.mlp.fc_out
Adding adapter to transformer.h.1.attn.k_proj
Initializing transformer.h.1.attn.k_proj
Adding adapter to transformer.h.1.attn.v_proj
Initializing transformer.h.1.attn.v_proj
Adding adapter to transformer.h.1.attn.q_proj
Initializing transformer.h.1.attn.q_proj
Adding adapter to transformer.h.1.attn.out_proj
Initializing transformer.h.1.attn.out_proj
Adding adapter to transformer.h.1.mlp.fc_in
Initializing transfor

GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): FrozenBNBEmbedding(50400, 4096)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-27): 28 x GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): FrozenBNBLinear(4096, 4096)
          (v_proj): FrozenBNBLinear(4096, 4096)
          (q_proj): FrozenBNBLinear(4096, 4096)
          (out_proj): FrozenBNBLinear(4096, 4096)
        )
        (mlp): GPTJMLP(
          (fc_in): FrozenBNBLinear(4096, 16384)
          (fc_out): FrozenBNBLinear(16384, 4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): FrozenBNBLinear(4096, 50400)
)

In [8]:
from sklearn.model_selection import train_test_split
import pandas as pd
# Load the data
data = pd.read_csv('/kaggle/input/quora-data/data_topic_blockchain.csv')
data 


Unnamed: 0,Question,Answer
0,Which branch of CS does Blockchain fall into?,Blockchain technology falls primarily within t...
1,Which branch of CS does Blockchain fall into?,Blockchain technology falls into the branch of...
2,Which branch of CS does Blockchain fall into?,It’s a whole new branch I can say…Many new thi...
3,Which branch of CS does Blockchain fall into?,Blockchain comprises of fundamentals of crypto...
4,What is a blockchain?,"I learned about blockchain in 2012, from the m..."
...,...,...
2854,What are some interesting research topics per...,There are a lot of interesting topics to look ...
2855,Why are companies interested in blockchain?,There came many new technologies in the past t...
2856,What are the implications of a world where ev...,A world where everything is connected on block...
2857,Why are people only interested in bitcoins an...,Bitcoin and blockchain are often used intercha...


In [9]:
#data.drop("Unnamed: 0" , axis = 1 , inplace = True)

In [10]:
data.head()

Unnamed: 0,Question,Answer
0,Which branch of CS does Blockchain fall into?,Blockchain technology falls primarily within t...
1,Which branch of CS does Blockchain fall into?,Blockchain technology falls into the branch of...
2,Which branch of CS does Blockchain fall into?,It’s a whole new branch I can say…Many new thi...
3,Which branch of CS does Blockchain fall into?,Blockchain comprises of fundamentals of crypto...
4,What is a blockchain?,"I learned about blockchain in 2012, from the m..."


In [11]:
prompt = []
for i in data.index:
    # Update the value in the "prompt" column by concatenating strings
    prompt.append(f"""[Question] : {data['Question'][i]} \n[Response]:{data['Answer'][i]}""")

# Access the updated value in the "prompt" column for a specific row
print(prompt[0])

[Question] : Which branch of CS does Blockchain fall into? 
[Response]:Blockchain technology falls primarily within the domain of Computer Science and its various subfields. Specifically, blockchain technology encompasses concepts and techniques related to distributed systems, cryptography, data structures, networking, and consensus algorithms. Here are a few specific branches of Computer Science that are relevant to blockchain: 1. Distributed Systems: Blockchain is fundamentally a decentralized distributed system. Research and concepts in distributed systems, including peer-to-peer networks, fault tolerance, consensus protocols, and data replication, are crucial Continue ReadingBlockchain technology falls primarily within the domain of Computer Science and its various subfields. Specifically, blockchain technology encompasses concepts and techniques related to distributed systems, cryptography, data structures, networking, and consensus algorithms. Here are a few specific branches of 

In [12]:
data["prompt"] = prompt
data = data["prompt"]

In [13]:
train, test = train_test_split(data, test_size=0.1) 
train.to_csv('/train.csv', index=False)
test.to_csv('/test.csv', index=False)

In [14]:
from datasets import load_dataset
dataset = load_dataset('csv', data_files={'train': '/train.csv',
                                              'test': '/test.csv'})

Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-1171c22cfca1d619/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-1171c22cfca1d619/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519. Subsequent calls will reuse this data.


  csv_file_reader = pd.read_csv(file, iterator=True, dtype=dtype, **self.config.read_csv_kwargs)
  csv_file_reader = pd.read_csv(file, iterator=True, dtype=dtype, **self.config.read_csv_kwargs)


  0%|          | 0/2 [00:00<?, ?it/s]

In [15]:
tokenizer.pad_token = tokenizer.eos_token
def tokenize_function(examples):
    return tokenizer(examples["prompt"], padding=True, truncation=True, max_length= 512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(["prompt"])
tokenized_datasets.set_format("torch")

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [16]:
from torch.utils.data import DataLoader

full_train_dataset = tokenized_datasets["train"]
train_dataloader = DataLoader(full_train_dataset, shuffle=True, batch_size=8)

In [17]:
from bitsandbytes.optim import Adam8bit

gpt.gradient_checkpointing_enable()
optimizer = Adam8bit(gpt.parameters(), lr=1e-5, weight_decay=0.01)

In [18]:
num_epochs = 2
num_training_steps = num_epochs * len(train_dataloader)

In [19]:
lr_scheduler = transformers.get_linear_schedule_with_warmup(
    optimizer, int(num_training_steps*0.1), num_training_steps
)

In [20]:
from tqdm.auto import tqdm

scaler = torch.cuda.amp.GradScaler()
progress_bar = tqdm(range(num_training_steps))
gpt.train()
gpt.gradient_checkpointing_enable()
k = 0

for epoch in range(num_epochs):
    for batch in train_dataloader:
        k = k + 1
        if k % 500 == 0:
          
          #print(k)
          state = {'k' : k, 'epoch': num_epochs, 'lr_scheduler': lr_scheduler.state_dict(), 'state_dict': gpt.state_dict(), 'optimizer': optimizer.state_dict()}
          #torch.save(state, filepath)

        batch = {k: v.to(device) for k, v in batch.items()}

        optimizer.zero_grad()
        

        with torch.autograd.profiler.record_function("model_inference"):
            with torch.cuda.amp.autocast():
                
                out = gpt.forward(**batch,)
                
                loss = F.cross_entropy(out.logits[:, :-1, :].flatten(0, -2), batch['input_ids'][:, 1:].flatten(),
                                  reduction='mean', label_smoothing=0.1)

        #print(loss)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(gpt.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()

        lr_scheduler.step()
        progress_bar.update(1)

  0%|          | 0/644 [00:00<?, ?it/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


In [None]:
gpt.save_pretrained('FineTune_gptj')
tokenizer.save_pretrained('FineTune_gptj')
import shutil
shutil.make_archive("FineTune_gptj", 'zip', "/kaggle/working/FineTune_gptj")

In [None]:
%cd /kaggle/working
from IPython.display import FileLink
FileLink(r'FineTune_gptj.zip')

# **Evaluation**

In [21]:
gpt.eval()
with torch.no_grad():
  prompt = tokenizer("[Question]:Are blockchain transactions slow? \n [Response] : ", truncation=True, padding=True, max_length=256, return_tensors='pt')
  prompt = {key: value.to(device) for key, value in prompt.items()}
  out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
  print(tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Question]:Are blockchain transactions slow? 
 [Response] : Blockchain technology is based on a distributed ledger. The network of computers participating in the decentralized system makes it more secure and efficient than traditional systems. However, this also means that each transaction can take some time to process. It's possible for a large number of people or companies to use an exchange at once, making transactions faster than they are with other types of exchanges (including online banks). In addition, Blockchain transactions usually take longer than normal bank transfers because of security regulations.<|endoftext|>


In [22]:
gpt.eval()
with torch.no_grad():
  prompt = tokenizer("[Question]: What’s the advantage of building games on a blockchain platform? \n [Response] : ", truncation=True, padding=True, max_length=256, return_tensors='pt')
  prompt = {key: value.to(device) for key, value in prompt.items()}
  out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
  print(tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Question]: What’s the advantage of building games on a blockchain platform? 
 [Response] :  I believe that Blockchain can help game developers to do something we have not been able to before. And, in general, blockchain has the potential to improve everything: business, data storage and so forth. If you want to know more about how it could be used by gaming studios, my colleague at B2M will explain all in one day at Games Summit London. But for now, let me tell you about what blockchains are: they are distributed databases, but with three major features: decentralization, security and transparency. Decentralization means that nobody controls the entire database; only nodes ( computers ) can access and store information in it. Security is because blockchain is a digital ledger, which makes it tamper-resistant. Transparency ensures everyone sees and understands everything going into and out of an accounts<|endoftext|>


In [23]:
gpt.eval()
with torch.no_grad():
  prompt = tokenizer("[Question]: Why is blockchain required for NFT? \n [Response] : ", truncation=True, padding=True, max_length=256, return_tensors='pt')
  prompt = {key: value.to(device) for key, value in prompt.items()}
  out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
  print(tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Question]: Why is blockchain required for NFT? 
 [Response] :  There are a few reasons why Blockchain has been touted as the “killer” technology of 2019. It's not just that it can help secure cryptocurrencies, or that it's more efficient and secure than other forms of distributed ledger technology. In fact, many aspects of blockchain and Cryptocurrency seem like they would be enough to satisfy the demand that is in front us; but there's one key benefit from using Blockchain as a platform for non-fungible tokens (NFT) and the reasons behind the importance of this particular feature. If you have an asset in your possession and its value diminishes over time, then this is when NFTs could really come in handy. The value associated with digital collectibles often changes over their lifetimes, depending on who's holding the data in different places, how it's stored, and other factors. But if you're having problems tracking what happens to your collection of digital goods, the only way forwa

In [24]:
gpt.eval()
with torch.no_grad():
  prompt = tokenizer("[Question]: What is a blockchain? \n [Response] : ", truncation=True, padding=True, max_length=256, return_tensors='pt')
  prompt = {key: value.to(device) for key, value in prompt.items()}
  out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
  print(tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Question]: What is a blockchain? 
 [Response] : A Blockchain is a type of digital ledger system, that has no middlemen or centralized institutions involved. Here we will explain more about Blockchain in the easiest way possible: A Blockchain is a decentralized system where everyone has equal opportunity to join the network and create data/records, as well as to participate in transaction, mining, voting etc. Block chain consistsof blocks that store the information for every single record, transaction etc on the network. Each block contains a timestamp to verify its integrity, a setContinue Reading A Blockchain is a type ofdigital ledger system, that has no middlemen orcentralized institutions involved. Here we will explain moreabout blockchain in the easiest way possible: ABlockchain is a decentralized systemwhere everything is stored in its entirety across the network without anymiddlemen, such as banks or central servers, that may control the information. Therefore, there are no int

In [25]:
gpt.eval()
with torch.no_grad():
  prompt = tokenizer("[Question]: Do pending blockchain transactions expire? \n [Response] : ", truncation=True, padding=True, max_length=256, return_tensors='pt')
  prompt = {key: value.to(device) for key, value in prompt.items()}
  out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
  print(tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Question]: Do pending blockchain transactions expire? 
 [Response] :  Blockchain is a public ledger that records the state of the Bitcoin network. There are no expiration dates on transactions and the time required to complete the transaction can vary depending upon the network condition, but most blocks take approximately 10 minutes to process. 

<|endoftext|>


In [26]:
gpt.eval()
with torch.no_grad():
  prompt = tokenizer("[Question]: What applications and uses will bring blockchain mainstream? \n [Response] : ", truncation=True, padding=True, max_length=256, return_tensors='pt')
  prompt = {key: value.to(device) for key, value in prompt.items()}
  out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
  print(tokenizer.decode(out[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Question]: What applications and uses will bring blockchain mainstream? 
 [Response] :  Blockchain, as it’s now used today in the crypto space (Bitcoin/Ethereum), is an inefficient way of storing data. Its underlying premise that a large number of people are participating on an open distributed ledger, without trust between them, means that there must be some central authority who can verify your identity when making purchases, etc. Blockchain technology provides many other benefits beyond the scope this question is discussing. For a more detailed discussion of these technologies, you should check out the following resources:  • Bitcoin whitepaper—The initial blockchain paper by Satoshi Nakamoto • The Hyperledger project — A set of block-building protocols for secure networks• Ethereum white papers—A nonfiction summary of the basic science behind cryptocurrency<|endoftext|>


In [24]:
while True : 
    responses = []
    pred = []
    gpt.eval()
    for sentence in test.values:
        #print("**************************************************************")
        st = sentence.split('[Response]:')[0].strip()
        responses.append('[Response]:'+sentence.split('[Response]:')[1].strip())
        #print(st)
        with torch.no_grad():
            prompt = tokenizer(st, truncation=True, padding=True, max_length=256, return_tensors='pt')
            prompt = {key: value.to(device) for key, value in prompt.items()}
            out = gpt.generate(**prompt, max_length=512, top_k=50, top_p=0.9, temperature=1.0, do_sample=True, repetition_penalty = 1.2, num_beams=1)
            #print('\n')
            #print("GPT-J :" , tokenizer.decode(out[0]))
            pred.append(tokenizer.decode(out[0],skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

In [None]:
"""
eval_dataset_gptj = pd.DataFrame({'paraphrase':paraphrases , 'predicted':pred})
eval_dataset_gptj.to_excel('/kaggle/working/eval_dataset_gptj.xlsx',index=False)"""

In [None]:
# Save the model
#torch.save(gpt.state_dict(), '/kaggle/working/gpt-j-6B.pt')


In [None]:
"""print(eval_dataset_gptj['paraphrase'][0])
print(eval_dataset_gptj['predicted'][0])"""

In [None]:
"""predicted = []
for p in eval_dataset_gptj.predicted.values :
    #print(p)
    st = p.split('[Positive]:')[1].strip()
    predicted.append('[Positive]:'+st)
    """

In [None]:
#predicted[0]

In [None]:
#paraphrases[0]

In [None]:
"""!pip install rouge
!pip install evaluate
!pip install rouge_score"""

In [None]:
"""import nltk
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import SmoothingFunction
from nltk.translate import bleu_score
from rouge import Rouge
import math"""

In [None]:
"""# Calculate the BLEU score
references = [[nltk.word_tokenize(original)] for original in paraphrases]
hypotheses = [nltk.word_tokenize(predicted) for predicted in predicted]
bleu1 = corpus_bleu(references, hypotheses, weights=(1.0, 0.0, 0.0, 0.0), smoothing_function=SmoothingFunction().method1)
bleu2 = corpus_bleu(references, hypotheses, weights=(0.5, 0.5, 0.0, 0.0), smoothing_function=SmoothingFunction().method1)
bleu3 = corpus_bleu(references, hypotheses, weights=(0.33, 0.33, 0.33, 0.0), smoothing_function=SmoothingFunction().method1)
bleu4 = corpus_bleu(references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=SmoothingFunction().method1)

print("BLEU-1 score:", bleu1)
print("BLEU-2 score:", bleu2)
print("BLEU-3 score:", bleu3)
print("BLEU-4 score:", bleu4)"""

In [None]:
"""# Calculate the Rouge score
rouge = Rouge()
scores = rouge.get_scores(predicted,paraphrases, avg=True)
rouge_l = scores['rouge-l']
print("Rouge-L score:", rouge_l)"""

In [None]:
"""import evaluate
rouge = evaluate.load('rouge')

results = rouge.compute(predictions=predicted,references=paraphrases)"""

In [None]:
"""results"""

In [None]:
"""bleu = evaluate.load("bleu")
results = bleu.compute(predictions=predicted,references=paraphrases)
print(results)"""

In [None]:
import pandas as pd 

In [None]:
data = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')

In [None]:
data = data[["text","target"]]

In [None]:
data.head()