In [1]:
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from transformers import BertModel, BertConfig, BertTokenizer
from transformers import Trainer, TrainingArguments
from transformers import BartForConditionalGeneration, BartTokenizerFast
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import AdamW
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from pathlib import Path
import logging
import sys
import os
import gc
from rouge import Rouge

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
log = logging.getLogger(__name__)

In [3]:
TRAIN_ARXIV_ENCODING_TENSOR_FILE = './train/train_encodings'
TRAIN_ARXIV_LABEL_TENSOR_FILE = './train/train_labels'

TEST_ARXIV_ENCODING_TENSOR_FILE = './test/test_encodings'
TEST_ARXIV_LABEL_TENSOR_FILE = './test/test_labels'

### BART

In [47]:
tokenizer = BartTokenizerFast.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn').to(device)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /facebook/bart-large-cnn/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /facebook/bart-large-cnn/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /facebook/bart-large-cnn/resolve/main/tokenizer.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /facebook/bart-large-cnn/resolve/main/added_tokens.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /fac

### Data processing

In [5]:
has_arxiv_tensor = True if len(os.listdir('./train')) != 0 else False

In [6]:
arxiv = None if has_arxiv_tensor else load_dataset('scientific_papers', 'arxiv')

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443
DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/scientific_papers/scientific_papers.py HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): raw.githubusercontent.com:443
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/1.6.2/datasets/scientific_papers/scientific_papers.py HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): raw.githubusercontent.com:443
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/1.6.2/datasets/scientific_papers/dataset_infos.json HTTP/1.1" 200 0


In [7]:
def load_tensor(tensor_file, data, tokenizer):
    if not Path(f'{tensor_file}.pt').is_file():
        log.debug('-- No tensor file found, start tokenizing --')
        tensor = tokenizer.batch_encode_plus(data, truncation=True, padding=True, max_length=1024, return_tensors='pt')
        log.debug(f'-- Save tokenized tensor to {tensor_file} --')
        torch.save(tensor, f'{tensor_file}.pt')
    else:
        log.debug(f'-- Load tokenized tensor from {tensor_file} --')
        tensor = torch.load(f'{tensor_file}.pt')
    return tensor

In [8]:
class Summary_dataset(Dataset):
  def __init__(self, encodings, labels):
    self.encodings = encodings
    self.labels = labels


  def __getitem__(self, index):
    item = {key: torch.tensor(val[index]) for key, val in self.encodings.items()}
    item['labels'] = torch.tensor(self.labels['input_ids'][index])
    return item
  

  def __len__(self):
    return self.encodings['input_ids'].size(0)



In [12]:
test_encodings = load_tensor(
    TEST_ARXIV_ENCODING_TENSOR_FILE,
    None if has_arxiv_tensor else list(map(lambda x: x['article'], arxiv['test'])),
    tokenizer
)

test_labels = load_tensor(
    TEST_ARXIV_LABEL_TENSOR_FILE,
    None if has_arxiv_tensor else list(map(lambda x: x['abstract'], arxiv['test'])),
    tokenizer
)

test_dataset = Summary_dataset(test_encodings, test_labels)

DEBUG:__main__:-- Load tokenized tensor from ./test/test_encodings --
DEBUG:__main__:-- Load tokenized tensor from ./test/test_labels --


### Fine-tune

In [None]:
SPLITS = 20
EPOCH = 1
LR_RATE = 5e-5
TRAIN_BATCH_SIZE = 32

In [None]:
optimizer = AdamW(model.parameters(), lr=LR_RATE)

batch_size = 0 if has_arxiv_tensor else len(arxiv['train']) // SPLITS
articles = None if has_arxiv_tensor else list(map(lambda x: x['article'], arxiv['train']))
abstracts = None if has_arxiv_tensor else list(map(lambda x: x['abstract'], arxiv['train']))

for i in range(SPLITS):
    model.train()
    
    start = i * batch_size
    end = (i + 1) * batch_size if i != SPLITS - 1 else len(arxiv['train']) - 1
    
    train_encodings = load_tensor(
        f'{TRAIN_ARXIV_ENCODING_TENSOR_FILE}_{i}',
        None if has_arxiv_tensor else articles[i * batch_size : (i + 1) * batch_size],
        tokenizer
    )

    train_labels = load_tensor(
        f'{TRAIN_ARXIV_LABEL_TENSOR_FILE}_{i}',
        None if has_arxiv_tensor else abstracts[i * batch_size : (i + 1) * batch_size],
        tokenizer
    )
    
    train_dataset = Summary_dataset(train_encodings, train_labels)
    train_loader = DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, drop_last=True)

    for j in range(EPOCH):
        with tqdm(train_loader, unit="batch") as epoch:
            for batch in epoch:
                
                optimizer.zero_grad()
                total_loss = torch.tensor(0, dtype=torch.float, device=device)
                
                # gradient accumulation
                for idx in range(TRAIN_BATCH_SIZE):
                    input_ids = batch['input_ids'][idx].view(1, -1).to(device)
                    attention_mask = batch['attention_mask'][idx].view(1, -1).to(device)
                    labels = batch['labels'][idx].view(1, -1).to(device)

                    outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
                    loss = outputs[0] / TRAIN_BATCH_SIZE
                    loss.backward()
                    
                    total_loss += loss
                epoch.set_postfix(loss=total_loss)
                
                optimizer.step()
    del train_encodings
    del train_labels
    del train_dataset
    del train_loader
    gc.collect()
    log.info(f'-- Saving #{i} model checkpoint --')
    torch.save(model.state_dict(), f'bart_split_{i}.pt')
    
    model.eval()

## Evaluation
### BART Base

In [14]:
import psutil
CPU_COUNT = psutil.cpu_count(logical = True)
rouge = Rouge()

In [46]:
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn').to(device)
model.eval()

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /facebook/bart-large-cnn/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /facebook/bart-large-cnn/resolve/main/pytorch_model.bin HTTP/1.1" 302 0


BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0): BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
   

In [16]:
predicts, labels = [], arxiv['test']['abstract'][:500]

for batch in DataLoader(test_dataset, batch_size=1, drop_last=True, num_workers=CPU_COUNT):
    predicts.append(
        tokenizer.decode(
            model.generate(batch['input_ids'].to(device).view(1, -1)).squeeze(0)
        )
    )
    if len(predicts) == 500:
        break

  
  
  
  if __name__ == '__main__':
  if __name__ == '__main__':
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
  
  if __name__ == '__main__':
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
  if __name__ == '__main__':


In [18]:
rouge.get_scores(predicts, labels, avg=True)

{'rouge-1': {'f': 0.25636286683601944,
  'p': 0.432389182280891,
  'r': 0.19238319264202147},
 'rouge-2': {'f': 0.0761504181802766,
  'p': 0.1288743815556388,
  'r': 0.056975345464353316},
 'rouge-l': {'f': 0.23734391309236852,
  'p': 0.33479190615244336,
  'r': 0.19148226445524558}}

#### Original articles

In [66]:
arxiv['test']['abstract'][0]

' the short - term periodicities of the daily sunspot area fluctuations from august 1923 to october 1933 are discussed . for these data \n the correlative analysis indicates negative correlation for the periodicity of about @xmath0 days , but the power spectrum analysis indicates a statistically significant peak in this time interval . \n a new method of the diagnosis of an echo - effect in spectrum is proposed and it is stated that the 155-day periodicity is a harmonic of the periodicities from the interval of @xmath1 $ ] days .    the autocorrelation functions for the daily sunspot area fluctuations and for the fluctuations of the one rotation time interval in the northern hemisphere , separately for the whole solar cycle 16 and for the maximum activity period of this cycle do not show differences , especially in the interval of @xmath2 $ ] days . \n it proves against the thesis of the existence of strong positive fluctuations of the about @xmath0-day interval in the maximum activity

In [45]:
arxiv['test']['article'][0]

'for about 20 years the problem of properties of short - term changes of solar activity has been considered extensively .\nmany investigators studied the short - term periodicities of the various indices of solar activity .\nseveral periodicities were detected , but the periodicities about 155 days and from the interval of @xmath3 $ ] days ( @xmath4 $ ] years ) are mentioned most often .\nfirst of them was discovered by @xcite in the occurence rate of gamma - ray flares detected by the gamma - ray spectrometer aboard the _ solar maximum mission ( smm ) .\nthis periodicity was confirmed for other solar flares data and for the same time period @xcite .\nit was also found in proton flares during solar cycles 19 and 20 @xcite , but it was not found in the solar flares data during solar cycles 22 @xcite .\n_    several autors confirmed above results for the daily sunspot area data . @xcite studied the sunspot data from 18741984 .\nshe found the 155-day periodicity in data records from 31 ye

In [67]:
arxiv['test']['abstract'][1]

' we study the detectability of circular polarization in a stochastic gravitational wave background from various sources such as supermassive black hole binaries , cosmic strings , and inflation in the early universe with pulsar timing arrays . \n we calculate generalized overlap reduction functions for the circularly polarized stochastic gravitational wave background . \n we find that the circular polarization can not be detected for an isotropic background . however , there is a chance to observe the circular polarization for an anisotropic gravitational wave background . \n we also show how to separate polarized gravitational waves from unpolarized gravitational waves . '

In [53]:
arxiv['test']['article'][1]

'it is believed that the direct detection of gravitational waves ( gws ) will bring the era of gravitational wave astronomy .\nthe interferometer detectors are now under operation and awaiting the first signal of gws  @xcite .\nit is also known that pulsar timing arrays ( ptas ) can be used as a detector for gws @xcite .\nthese detectors are used to search for very low frequency ( @xmath0 ) gravitational waves , where the lower limit of the observable frequencies is determined by the inverse of total observation time @xmath1 .\nindeed , the total observation time has a crucial role in ptas , because ptas are most sensitive near the lower edge of observable frequencies @xcite . taking into account its sensitivity ,\nthe first direct detection of the gravitational waves might be achieved by ptas .\nthe main target of ptas is the stochastic gravitational wave background ( sgwb ) generated by a large number of unresolved sources with the astrophysical origin or the cosmological origin in t

In [68]:
arxiv['test']['abstract'][2]

' starting from the wkb approximation , a new barrier penetration formula is proposed for potential barriers containing a long - range coulomb interaction . \n this formula is especially proper for the barrier penetration with penetration energy much lower than the coulomb barrier . \n the penetrabilities calculated from the new formula agree well with the results from the wkb method . as a first attempt , \n this new formula is used to evaluate @xmath0 decay half - lives of atomic nuclei and a good agreement with the experiment is obtained . '

In [54]:
arxiv['test']['article'][2]

'as a common quantum phenomenon , the tunneling through a potential barrier plays a very important role in the microscopic world and has been studied extensively since the birth of quantum mechanics .\none of the earliest applications of quantum tunneling is the explanation of @xmath0 decays in atomic nuclei .\nthe quantum tunneling effect governs also many other nuclear processes such as fission and fusion .\nin particular , a lot of new features are revealed in sub - barrier fusion reactions which are closely connected with the tunneling phenomena  @xcite .    for most of the potential barriers , the penetrability can not be calculated analytically  @xcite . among those potentials for which analytical solutions can be obtained ,\nthe parabolic potential  @xcite is the mostly used in the study of nuclear fusion . by approximating the coulomb barrier to a parabola\n, wong derived an analytic expression for the fusion cross section  @xcite which is widely adopted today in the study of h

In [69]:
arxiv['test']['abstract'][3]

' we study a novel class of numerical integrators , the adapted nested force - gradient schemes , used within the molecular dynamics step of the hybrid monte carlo ( hmc ) algorithm . \n we test these methods in the schwinger model on the lattice , a well known benchmark problem . \n we derive the analytical basis of nested force - gradient type methods and demonstrate the advantage of the proposed approach , namely reduced computational costs compared with other numerical integration schemes in hmc . '

In [55]:
arxiv['test']['article'][3]

'for the hybrid monte carlo algorithm ( hmc)@xcite , often used to study quantum chromodynamics ( qcd ) on the lattice , one is interested in efficient numerical time integration schemes which are optimal in terms of computational costs per trajectory for a given acceptance rate . high order\nnumerical methods allow the use of larger step sizes , but demand a larger computational effort per step ; low order schemes do not require such large computational costs per step , but need more steps per trajectory .\nso there is a need to balance these opposing effects .\nomelyan integration schemes @xcite of a force - gradient type have proved to be an efficient choice , since it is easy to obtain higher order schemes that demand a small additional computational effort .\nthese schemes use higher - order information from force - gradient terms to both increase the convergence of the method and decrease the size of the leading error coefficient . other ideas to achieve better efficiency for num

In [70]:
arxiv['test']['abstract'][4]

' new methods for obtaining functional equations for feynman integrals are presented . \n application of these methods for finding functional equations for various one- and two- loop integrals described in detail . \n it is shown that with the aid of functional equations feynman integrals in general kinematics can be expressed in terms of simpler integrals .    \n pacs numbers : 02.30.gp , 02.30.ks , 12.20.ds , 12.38.bx + keywords : feynman integrals , functional equations     +    derivation of functional equations for feynman integrals + from algebraic relations   +    * o.v .  \n tarasov * +   ii . \n institut fr theoretische physik , universitt hamburg , + luruper chaussee 149 , 22761 hamburg , germany + and + joint institute for nuclear research , + 141980 dubna , russian federation + : otarasov@jinr.ru + '

In [56]:
arxiv['test']['article'][4]

"recently it was discovered that feynman integrals obey functional equations @xcite , @xcite .\ndifferent examples of functional equations were presented in refs .\n@xcite , @xcite,@xcite . in these articles\nonly one - loop integrals were considered .    in the present paper\nwe propose essentially new methods for deriving functional equations .\nthese methods are based on algebraic relations between propagators and they are suitable for deriving functional equations for multi - loop integrals . also these methods can be used to derive functional equations for integrals with some propagators raised to non - integer powers .\nour paper is organized as follows . in sec .\n2 . the method proposed in ref .  @xcite is shortly reviewed .    in sec .\n3 . a method for finding algebraic relations between products of propagators is formulated .\nwe describe in detail derivation of explicit relations for products of two , three and four propagators .\nalso algebraic relation for products of arb

#### Sample summary output

In [48]:
predicts[0]

  
  if __name__ == '__main__':


'</s><s>For about 20 years the problem of properties of short - term changes of solar activity has been considered extensively. Several periodicities were detected, but the periodicities about 155 days and from the interval of @xmath3 $ ] days are mentioned most often. The periodicity between 150160 days is statistically significant during all cycles from 16 to 21. The power of this periodicity started growing at cycle 19, decreased in cycles 20 and 21 and disappered after cycle 21.</s>'

In [49]:
predicts[1]

  
  if __name__ == '__main__':


'</s><s>It is believed that the direct detection of gravitational waves ( gws) will bring the era of gravitational wave astronomy. Pulsar timing arrays ( ptas) can be used as a detector for gws. The main target of ptas is the stochastic gravitational wave background ( sgwb)</s>'

In [50]:
predicts[2]

  
  if __name__ == '__main__':


'</s><s>The tunneling through a potential barrier plays a very important role in the microscopic world and has been studied extensively since the birth of quantum mechanics. For most of the potential barriers, the penetrability can not be calculated analytically. In the present work, we derived a new barrier penetration formula based on the wkb approximation. We apply this new formula to evaluate the half - lives of atomic nuclei.</s>'

In [51]:
predicts[3]

  
  if __name__ == '__main__':


'</s><s>for the hybrid monte carlo algorithm ( hmc)@xcite, often used to study quantum chromodynamics ( qcd ) on the lattice, one is interested in efficient numerical time integration schemes. high ordernumerical methods allow the use of larger step sizes, but demand a larger computational effort per step. low order schemes do not require such large computational costs per step, but need more steps per trajectory. A natural way to inherit the advantages from force - gradient type schemes and multirate approaches would be to combine these two ideas.</s>'

In [52]:
predicts[4]

  
  if __name__ == '__main__':


'</s><s>recently it was discovered that feynman integrals obey functional equations. In the present paper we propose essentially new methods for derivingfunctional equations. These methods are based on algebraic relations between propagators. They can be used to derive functional equations for integrals with some propagators raised to non - integer powers. We formulate our vision of the future applications of the proposed methods.</s>'

### BART fine-tune on Arxiv scientific dataset after 1 epoch

In [57]:
rouge = Rouge()

model.load_state_dict(torch.load(f'bart_split_{15}.pt'))
model.eval()

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0): BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
   

In [21]:
predicts, labels = [], arxiv['test']['abstract'][:500]

for batch in DataLoader(test_dataset, batch_size=1, drop_last=True, num_workers=CPU_COUNT):
    predicts.append(
        tokenizer.decode(
            model.generate(batch['input_ids'].to(device).view(1, -1)).squeeze(0)
        )
    )
    if len(predicts) == 500:
        break

  
  
  
  if __name__ == '__main__':
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
  if __name__ == '__main__':
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
  
  if __name__ == '__main__':
  if __name__ == '__main__':


#### ROUGE Score

In [22]:
rouge.get_scores(predicts, labels, avg=True)

{'rouge-1': {'f': 0.37150027317634876,
  'p': 0.4994501188160528,
  'r': 0.3140735726099545},
 'rouge-2': {'f': 0.13533945533327682,
  'p': 0.18288610584670895,
  'r': 0.11403439901011984},
 'rouge-l': {'f': 0.31408308485193576,
  'p': 0.3838079853883263,
  'r': 0.27784543247788374}}

#### Sample summary output

In [61]:
predicts[0]

  
  if __name__ == '__main__':


'</s><s><s> the problem of the existence of a short - term periodicity in the sunspot data from cycle 16. \n the daily sunspot area, the mean sunspot areas per carrington rotation, the monthly sunspot numbers and their fluctuations are obtained after removing the 11-year cycle. the properties of the power spectrum methods \n are analysed and the periodicity is found to be statistically significant during all cycles from 16 to 21.    _ \n keywords _ : solar activity, sunspot number, solar wind plasma, interplanetary magnetic field </s>'

In [62]:
predicts[1]

  
  if __name__ == '__main__':


'</s><s> we investigate the detectability of circular polarization in the stochastic gravitational wave background ( sgwb ) generated by a large number of unresolved sources with the astrophysical origin or the cosmological origin in the early universe by pulsar timing arrays ( ptas ). \n we characterize the stokes parameters for monochromatic plane gravitational waves and calculate generalized overlap reduction functions ( orfs ) so that we can probe the circular polarization of the sgWB.    in this paper \n, we formulate the cross - correlation formalism for anisotropic circularly polarized sgswb with ptas. the basic framework is essentially a combination of the formalism of @xc</s>'

In [63]:
predicts[2]

  
  if __name__ == '__main__':


'</s><s> we derive a new barrier penetration formula based on the wkb approximation. \n the influence of the long - range coulomb tail in the barrier potential is taken into accout properly. as a first attempt and a test study \n, we apply this new formula to evaluate @xmath0 decay half - lives of atomic nuclei and show that the present analytical formula reproduces the experimental results very well. </s>'

In [64]:
predicts[3]

  
  if __name__ == '__main__':


'</s><s> the hybrid monte carlo algorithm ( hmc ) is often used to study quantum chromodynamics ( qcd ) on the lattice. \n we present a novel class of numerical time integration schemes for the hmc algorithm, based on the idea of force - gradient integrators, which use higher - order information from force gradients to both increase the convergence of the method and decrease the size of the leading error coefficient. in this paper \n, we study the computational costs needed to perform numerical calculations, as well as the effort required to achieve a satisfactory acceptance rate during the evolution of hmc. </s>'

In [65]:
predicts[4]

  
  if __name__ == '__main__':


'</s><s> we propose essentially new methods for deriving functional equations for multi - loop integrals. \n these methods are based on algebraic relations between products of propagators and they can be used to derive functional equation for integrals with some propagators raised to non - integer powers. in particular functional equation is presented for the massless one - loop vertex type integral with arbitrary masses. as a by product, from this functional equation \n we obtained new hypergeometric representation for the one-loop massless vertex integral. </s>'

### Pegasus fine-tune on Arxiv scientific dataset

In [23]:
rouge = Rouge()
model_name = 'google/pegasus-arxiv'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /google/pegasus-arxiv/resolve/main/spiece.model HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /google/pegasus-arxiv/resolve/main/added_tokens.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /google/pegasus-arxiv/resolve/main/special_tokens_map.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /google/pegasus-arxiv/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 

In [30]:
predicts, labels = [], arxiv['test']['abstract'][:500]

articles = tokenizer.batch_encode_plus(arxiv['test']['article'][:500], max_length=1024, return_tensors='pt', truncation=True, padding="longest").to(device)
for batch in articles['input_ids']:
    predicts.append(
        tokenizer.decode(
            model.generate(batch.to(device).view(1, -1)).squeeze(0)
        )
    )
    if len(predicts) == 500:
        break

#### ROUGE Score

In [31]:
rouge.get_scores(predict, label, avg=True)

{'rouge-1': {'f': 0.30449826577267997,
  'p': 0.5238095238095238,
  'r': 0.2146341463414634},
 'rouge-2': {'f': 0.11149825372846596,
  'p': 0.1927710843373494,
  'r': 0.0784313725490196},
 'rouge-l': {'f': 0.324675319919042,
  'p': 0.4166666666666667,
  'r': 0.26595744680851063}}

#### Sample summary output

In [34]:
predicts[0]

'the daily sunspot areas, the mean sunspot areas per carrington rotation, the monthly sunspot numbers and their fluctuations, which are obtained after removing the 11-year cycle, are analysed. <n> a new approach to the problem of aliases is proposed. <n> the power spectrum method is applied for the analysis of the sunspot data. <n> it is shown that the sunspot data from cycle 16 present a short - term periodicity with the period of @xmath0 days ( @xmath1 $ ] years ). <n> this periodicity presents around the maximum activity period in cycles 16 to 21 and disappered after cycle 21. <n> the period of this periodicity is @xmath2 days for the mean sunspot areas per carrington rotation, @xmath2 days for the monthly sunspot numbers and @xmath2 days for the fluctuations of the sunspot numbers. <n> [ firstpage ] sun : activity sunspot : fluctuations'

In [35]:
predicts[1]

'we investigate the detectability of circular polarization in the stochastic gravitational wave background ( sgwb ) by pulsar timing arrays ( ptas ). <n> we characterize sgwb by the so called stokes parameters and calculate generalized overlap reduction functions ( orfs ) so that we can probe the circular polarization of the sgwb. <n> we also discuss a method to separate the intensity ( @xmath0 mode ) and circular polarization ( @xmath1 mode ) of the sgwb.'

In [36]:
predicts[2]

'a new analytical formula for the barrier penetrability is derived based on the wkb approximation. <n> the influence of the long coulomb tail in the barrier potential is taken into account properly. <n> this formula is especially applicable to the barrier penetration with penetration energy much lower than the coulomb barrier. as a first attempt and a test study, we apply this new formula to evaluate @xmath0 decay half - lives of atomic nuclei. <n> we show that the present analytical formula reproduces the experimental results very well, especially for spherical nuclei.'

In [37]:
predicts[3]

'we introduce a new numerical time integration scheme for the hybrid monte carlo algorithm ( hmc ), which combines the advantages of force - gradient type integrators and multirate approaches. <n> the new scheme is tested on the two - dimensional schwinger model and is found to provide a sufficiently high acceptance rate while not significantly increasing the simulation time.'

In [38]:
predicts[4]

'new methods for deriving functional equations for feynman integrals are proposed. <n> these methods are based on algebraic relations between propagators and they are suitable for deriving functional equations for multi - loop integrals. <n> also these methods can be used to derive functional equations for integrals with some propagators raised to non - integer powers.'

In [39]:
predicts[5]

'in this paper we study in details the problem of sensitivity loss due to discretization of parameters and to the needs to limit the computing cost, with hough procedures. in particular, we propose and study the characteristics of a frequency hough procedure, designed mainly to reduce the discretization problem, and we compare it with the sky hough procedure, which is actually used in the virgo collaboration.'

In [40]:
predicts[6]

'this review focuses specifically on what we have learned about the progenitors of core - collapse supernovae ( cc sne ) by examining images of the supernova ( sn ) sites taken prior to the explosion. by registering pre - sn and post - sn images, usually taken at high resolution using either space - based optical detectors, or ground - based infrared detectors equipped with laser guide star adaptive optics systems ( lgs - ao ), about one dozen cc sne have now been directly detected ( i.e., shown to be spatially coincident with the sn ) in pre - sn images, with roughly two dozen upper limits derived from non - detections. <n> one example from each of the following three categories of progenitor studies is provided : ( 1 ) no progenitor star detected in pre - sn image(s ) ; ( 2 ) likely progenitor star identified via spatial coincidence in pre - sn image(s ) ; ( 3 ) progenitor star detected in pre - sn image(s ) and subsequently confirmed by demonstrating its absence in images taken afte

In [41]:
predicts[7]

'single - transverse spin asymmetries ( ssas ) play a fundamental role for our understanding of qcd in high - energy hadronic scattering. <n> they may be obtained for reactions in, for example, lepton - proton or proton - proton scattering with one transversely polarized initial proton, by dividing the difference of the cross sections for the two settings of the transverse polarization by their sum. <n> a crucial feature is that the distribution functions and the soft factor in this factorization are not integrated over the transverse momenta of partons, because these in fact generate the observed transverse momentum @xmath0. <n> a particularly interesting feature is that the sivers effect is not universal in the usual sense, that is, it is not represented by universal probability functions convoluted with partonic hard - scattering cross sections. <n> however, the non - universality has in fact a clear physical origin, and its closer investigation has turned out to be an extremely imp

In [42]:
predicts[8]

'the kingman coalescent is a random tree with infinitely many leaves arising in large population genetic models. at time @xmath0 it comes down from infinitely many lines to @xmath1 lines. <n> it is known that for large @xmath1 a randomly chosen @xmath2 is approximately exponentially distributed with mean @xmath3. in this paper <n> we prove a large deviation principle for the distributions of @xmath4. as a byproduct <n> we derive a large deviation principle for the distributions of @xmath5, where @xmath6 is the time to the most recent common ancestor of the infinite population of leaves.'

In [43]:
predicts[9]

'the main purpose of this paper is to overview several different physical examples of multi - mode and/or multi - frequency solitary waves that occur for the pulse or beam propagation in nonlinear optical fibers and waveguides. for these purposes, we select three different cases : multi - wavelength solitary waves in bit - parallel - wavelength optical fiber links, multi - colour spatial solitons due to multistep cascading in optical waveguides with quadratic nonlinearities, and quasiperiodic solitons in the fibonacci superlattices. <n> we believe these examples display both the diversity and richness of the multi - mode soliton systems, and they will allow further progress to be made in the study of nonlinear waves in multi - component nonintegrable physical models.'