# LLM Text Summarization

Concepts:
- HuggingFace and Azure OpenAI APIs
- Text summarization of 10-K Disclosures About Market Risk
- Role prompting


In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import textwrap
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers.utils import logging
from openai import AzureOpenAI
from rouge_score import rouge_scorer
from tqdm import tqdm
from finds.database import SQL, RedisDB
from finds.unstructured import Edgar
from finds.structured import BusDay, CRSP, PSTAT
from finds.readers import Sectoring
from secret import paths, credentials
logging.set_verbosity_error() # logging.set_verbosity_info() #logging.set_verbosity_warning()
VERBOSE = 0
SAVED = False

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sql = SQL(**credentials['sql'], verbose=VERBOSE)
user = SQL(**credentials['user'], verbose=VERBOSE)
bd = BusDay(sql)
rdb = RedisDB(**credentials['redis'])
crsp = CRSP(sql, bd, rdb, verbose=VERBOSE)
pstat = PSTAT(sql, bd, verbose=VERBOSE)
ed = Edgar(paths['10X'], zipped=False, verbose=VERBOSE)

Last FamaFrench Date 2024-04-30 00:00:00


In [3]:
# Retrieve universe of stocks
univ = crsp.get_universe(bd.endmo(20231231))

In [4]:
# lookup company names
comnam = crsp.build_lookup(source='permno', target='comnam', fillna="")
univ['comnam'] = comnam(univ.index)

In [5]:
# lookup sic codes from Compustat, and map to FF 10-sector code
sic = pstat.build_lookup(source='lpermno', target='sic', fillna=0)
industry = Series(sic[univ.index], index=univ.index)
industry = industry.where(industry > 0, univ['siccd'])
sectors = Sectoring(sql, scheme='codes10', fillna='')   # supplement from crosswalk
univ['sector'] = sectors[industry]

Load Disclosure about Market Risk text from 10-K's

In [6]:
# retrieve from 10K's in 1Q 2024
item, form = 'qqr10K', '10-K'
rows = DataFrame(ed.open(form=form, item=item))
found = rows[rows['date'].between(20240101, 20240331)]\
    .drop_duplicates(subset=['permno'], keep='last')\
    .set_index('permno')

In [7]:
# Keep largest decile of stocks
found = found.loc[found.index.intersection(univ.index[univ['decile'] == 1])]

In [8]:
# Keep minimum length
docs = {permno: ed[found.loc[permno, 'pathname']].lower()
        for permno in found.index}
permnos = [permno for permno, doc in docs.items() if len(doc)>2000]
found = found.join(Series(docs, name='item').reindex(permnos), how='inner')

## HuggingFace APIs

Command line interface
   
`pip install huggingface_hub["cli"]`

- To empty cache (in ~/.cache/huggingface/)
    
  `huggingface-cli delete-cache`




### Transformers modules

- AutoTokenizers
- AutoModels
  - `generate` method: 


  



In [9]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
save_name = paths['scratch'] / "Llama-3-8B-Instruct"
model_name = save_name if SAVED else model_id   # load from folder if saved locally

### Quantization

https://medium.com/@manuelescobar-dev/implementing-and-running-llama-3-with-hugging-faces-transformers-library-40e9754d8c80

Quantization reduces the hardware requirements by loading the model weights with lower precision. Instead of loading them in 16 bits (float16), they are loaded in 4 bits, significantly reducing memory usage from ~20GB to ~8GB.

In [10]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)
bnb_config

BitsAndBytesConfig {
  "_load_in_4bit": true,
  "_load_in_8bit": false,
  "bnb_4bit_compute_dtype": "float16",
  "bnb_4bit_quant_storage": "uint8",
  "bnb_4bit_quant_type": "nf4",
  "bnb_4bit_use_double_quant": false,
  "llm_int8_enable_fp32_cpu_offload": false,
  "llm_int8_has_fp16_weight": false,
  "llm_int8_skip_modules": null,
  "llm_int8_threshold": 6.0,
  "load_in_4bit": true,
  "load_in_8bit": false,
  "quant_method": "bitsandbytes"
}

Load model and tokenizer from Huggingface

In [11]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    #torch_dtype=torch.bfloat16,
    device_map="cuda",  # "auto",  'cuda'
)
model

Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.01s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): Ll

Show model's context length

In [12]:
# Maximum context length
print('max context length', model.config.max_position_embeddings)

max context length 8192


In [13]:
# save the model to local disk
if not SAVED:
    model.save_pretrained(save_name)
print(f"CUDA memory: {torch.cuda.memory_allocated()/1e9:.2f} GB")

CUDA memory: 6.06 GB


### Temperature

Temperature is a parameter used to control the randomness of the model's output during text generation. Low Temperature (value close to 0) decreases randomness, leading to more focused and predictable outputs. High Temperature increases randomness: the model is more likely to choose less probable words, leading to more diverse and creative outputs.



In [14]:
# Template to prompt and generate response
MAX_CHAR = 20000
def generate_response(text, max_char=MAX_CHAR,
                      role="You are a helpful AI assistant",
                      prompt="Write a concise summary of the text."):
    content = f"""
{prompt}

Text in triple quotes: '''{(text+' ')[:max_char]}'''

Summary:""".strip()
    
    messages = [
        {"role": "system", "content": role},
        {"role": "user", "content": content},
    ]

    input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    if VERBOSE:
        print('tokens:', np.prod(input_ids.shape),
              model.config.max_position_embeddings)

    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    outputs = model.generate(
        input_ids,
        max_new_tokens=256,  
        eos_token_id=terminators,
        #do_sample=True,
        temperature=0.01, #0.6,
        #top_p=0.9,
    )
    response = outputs[0][input_ids.shape[-1]:]
    return tokenizer.decode(response, skip_special_tokens=True)

## Azure AI API

Azure, Microsoft's cloud-based platform for AI and other subscription services, provides API's and endpoints for OpenAI's models.

- https://oai.azure.com/portal
- https://openai.com/api/


### OpenAI API

Alternatively, GPT and other AI models, such as Whisper for automatic speech recognization (ASR), can be accessed directly from OpenAI:

- https://platform.openai.com/docs/overview


In [15]:
# create client object
LLM_MODEL = "gpt-35-turbo-16k"   # name of model to use
client = AzureOpenAI(api_version="2024-02-01", **credentials['azure'])

In [16]:
# template to prompt GPT and generate response
def get_gpt_response(prompt: str, system_prompt: str,
                     model: str = LLM_MODEL) -> str:
    """Creating message required by API: "prompt" will be the query sent to GPT"""
    try:
        messages = [{"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt}]
        completion = client.chat.completions.create(
            model=model,  # must match the custom deployment name you chose
            messages=messages,
            temperature=0.0,
        )
        return completion.choices[0].message.content
    except Exception as e:
        print(f"ERROR!\n\n {e}")
        return None

## Text summarization

Text summarization is the task of condensing a piece of text into a shorter version while retaining its key information and meaning. It aims to generate a concise summary that captures the main points of the original document or passage.

Sample one company from each industry, and generate summary of risk disclosure text with quantized Llama-3-8B model.

In [17]:
# Sample companies from each industry sector
docs = univ.loc[found.index].groupby('sector').sample(1)

In [18]:
summary = {}
for permno in docs.index:
    print('=====', univ.loc[permno, 'comnam'], '=====')
    summary[permno] = generate_response(found.loc[permno, 'item'])
    print("\n".join([textwrap.fill(s) for s in summary[permno].split('\n')]))
    print()

===== BOOKING HOLDINGS INC =====
Here is a concise summary of the text:

The company is exposed to market risk due to changes in interest
rates, foreign currency exchange rates, and equity prices. To manage
this risk, the company uses internal policies and procedures, as well
as derivative financial instruments, such as foreign currency exchange
contracts. The company evaluates its exposure to market risk by
assessing near-term and long-term fluctuations in interest rates and
foreign currency exchange rates. The company also faces exposure to
movements in foreign currency exchange rates due to its businesses
outside the US, which are translated from local currencies into US
dollars. Additionally, the company is exposed to equity price risk due
to changes in the fair values of its investments in equity securities
of publicly-traded companies and private companies.

===== PIONEER NATURAL RESOURCES CO =====
Here is a concise summary of the text:

The company, Pioneer Natural Resources Com

### Evaluation

__ROUGE__

Recall-Oriented Understudy for Gisting Evaluation (ROUGE) is a set of metrics used to evaluate the quality of summaries by comparing them to reference summaries or human-generated summaries.

- ROUGE-N measures the overlap of n-grams (contiguous sequences of n words) between the system-generated and the reference summaries
- ROUGE-L measures the longest common subsequence (LCS).

Recall, precision and F1-measure versions of these metrics can be computed.

__BLEU__

Bilingual Evaluation Understudy (BLEU) is a metric originally designed for evaluating machine translation, but it is also adapted for text summarization evaluation.
- N-gram Precision measures the overlap of n-grams (typically up to 4-grams) between the system-generated summary and the reference summary.
- Brevity Penalty penalizes overly short summaries that do not capture enough information from the reference summaries.
- Cumulative BLEU calculates the geometric mean of BLEU scores for 1-gram to n-gram, rewarding systems that produce more accurate translations across longer phrases.




In [19]:
# To compute rouge-1 and rouge-2 scores
rouge_types = ['rouge1', 'rouge2']
scorer = rouge_scorer.RougeScorer(rouge_types, use_stemmer=True)
scores = {rouge_type: [] for rouge_type in rouge_types}

Generete reference summaries from GPT

In [20]:
reference = {}
for permno in docs.index:
    print('=====', univ.loc[permno, 'comnam'], '=====')
    system_prompt = "You are an AI assistant."
    prompt_template = f"""
"Write a concise summary of the text.

Text in triple quotes: '''{(found.loc[permno, 'item'] + ' ')[:MAX_CHAR]}'''

Summary:""".strip()    
    reference[permno] = get_gpt_response(prompt_template, system_prompt)
    print("\n".join([textwrap.fill(s) for s in reference[permno].split('\n')]))
    print()
    score = scorer.score(target=reference[permno], prediction=summary[permno])
    for rouge_type in rouge_types:
        scores[rouge_type].append(Series(score[rouge_type]._asdict(),
                                         name=univ.loc[permno, 'comnam']))

===== BOOKING HOLDINGS INC =====
The text discusses the market risks that a company is exposed to,
including changes in interest rates, foreign currency exchange rates,
and equity prices. The company manages these risks through established
policies and procedures, as well as the use of derivative financial
instruments. The objective is to mitigate potential adverse
fluctuations in income statement, cash flow, and fair value. The
company evaluates its exposure by assessing near-term and long-term
fluctuations, utilizing market indicators and projections. The company
does not engage in speculative trading or leveraged derivatives. The
text also mentions the impact of interest rate changes on the fair
value of debt and the sensitivity of convertible senior notes to
equity market price volatility. Additionally, the company faces
exposure to foreign currency exchange rates due to the translation of
financial results from local currencies to US dollars. The impact of
currency exchange rates 

Compare rouge-1 metric


In [21]:
rouge_type = rouge_types[0]
df = pd.concat(scores[rouge_type], axis=1)
print(f"{rouge_type.upper()} metric:")
pd.concat([df, df.T.mean().rename('  average')], axis=1).T

ROUGE1 metric:


Unnamed: 0,precision,recall,fmeasure
BOOKING HOLDINGS INC,0.825758,0.586022,0.685535
PIONEER NATURAL RESOURCES CO,0.51145,0.544715,0.527559
PAYPAL HOLDINGS INC,0.5,0.764286,0.60452
BOSTON SCIENTIFIC CORP,0.488372,0.456522,0.47191
R T X CORP,0.675,0.646707,0.66055
MONSTER BEVERAGE CORP NEW,0.826667,0.382716,0.523207
HILTON WORLDWIDE HOLDINGS INC,0.726316,0.594828,0.654028
TARGET CORP,0.190217,0.583333,0.286885
A T & T INC,0.473684,0.578947,0.521053
WASTE MANAGEMENT INC DEL,0.638095,0.485507,0.55144


Compare rouge-2 metric

In [22]:
rouge_type = rouge_types[1]
df = pd.concat(scores[rouge_type], axis=1)
print(f"{rouge_type.upper()} metric:")
pd.concat([df, df.T.mean().rename('  average')], axis=1).T

ROUGE2 metric:


Unnamed: 0,precision,recall,fmeasure
BOOKING HOLDINGS INC,0.496183,0.351351,0.411392
PIONEER NATURAL RESOURCES CO,0.215385,0.229508,0.222222
PAYPAL HOLDINGS INC,0.29108,0.446043,0.352273
BOSTON SCIENTIFIC CORP,0.203125,0.189781,0.196226
R T X CORP,0.383648,0.36747,0.375385
MONSTER BEVERAGE CORP NEW,0.540541,0.248447,0.340426
HILTON WORLDWIDE HOLDINGS INC,0.521277,0.426087,0.4689
TARGET CORP,0.060109,0.186441,0.090909
A T & T INC,0.269231,0.329412,0.296296
WASTE MANAGEMENT INC DEL,0.355769,0.270073,0.307054


## Role prompting

Creating prompts based on the role or perspective of the person can be a useful technique for generating more relevant and engaging responses from language models.

The __system prompt__ is an initial set of instructions that serve as the starting point when starting a new chat session. This defines things for the model and helps to focus its capabilities.
The model will assume that role or person including their style.


For example, we can generate a more user-friendly summary to help explain the risk disclosures in 10-K's.


In [23]:
role = "You are a helpful first-grade teacher.",
prompt = "Write a simple summary of the text for a first-grader."
simple = {}
for permno in docs.index:
    print('=====', univ.loc[permno, 'comnam'], '=====')
    simple[permno] = generate_response(found.loc[permno, 'item'],
                                       role=role, prompt=prompt)
    print("\n".join([textwrap.fill(s) for s in simple[permno].split('\n')]))
    print()

===== BOOKING HOLDINGS INC =====
Here's a summary of the text for a first-grader:

Imagine you have some money in a special account. Sometimes, the value
of that money can go up or down. This can happen because of things
like changes in interest rates, like when you borrow money from a
bank, or changes in the value of other countries' money, like when you
buy something from a friend who lives in another country.

We have to be careful with our money, so we make plans to keep it
safe. We use special tools, like special contracts, to help keep our
money safe. We also look at what's happening in the world, like what's
happening with interest rates and other countries' money, to make sure
we're making good choices.

Sometimes, we might have to make some changes to our plans, like if
the value of our money goes up or down. But we're always trying to
keep our money safe and make good choices.

===== PIONEER NATURAL RESOURCES CO =====
Here's a summary of the text for a first-grader:

The comp