In [1]:
## INSTALLING DEPENDENCIES TO USE LLAMA-2-13b-CHAT for inference
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip3 install llama-cpp-python
!pip3 install huggingface-hub
!pip3 install sentence-transformers langchain langchain-experimental
!huggingface-cli download TheBloke/Llama-2-13b-Chat-GGUF llama-2-13b-chat.Q4_K_M.gguf --local-dir /content --local-dir-use-symlinks False

Consider using `hf_transfer` for faster downloads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.
downloading https://huggingface.co/TheBloke/Llama-2-13b-Chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf to /root/.cache/huggingface/hub/tmp_ndqm8nm
llama-2-13b-chat.Q4_K_M.gguf: 100% 7.87G/7.87G [01:12<00:00, 109MB/s]
/content/llama-2-13b-chat.Q4_K_M.gguf


In [31]:
from typing import List, Optional

from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain_community.document_loaders import AsyncHtmlLoader
from langchain_community.document_loaders import BSHTMLLoader
from langchain_text_splitters import HTMLHeaderTextSplitter
from langchain.schema.output_parser import StrOutputParser
from bs4 import BeautifulSoup
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain.callbacks.tracers import ConsoleCallbackHandler
import pprint
import re
import json

In [3]:
n_gpu_layers = 40
n_batch = 512

##Initialize LLamma Model from GGUF file - use GPU for quicker inference
llm = LlamaCpp(
    model_path="/content/llama-2-13b-chat.Q4_K_M.gguf",
    temperature=0,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    verbose=True,
    # grammar_path="/content/json.gbnf"
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /content/llama-2-13b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 5120
llama_model_loader: - kv   4:                          llama.block_count u32              = 40
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 13824
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32      

In [4]:
###Create Pydantic BaseModel for better structured output
class Company(BaseModel):
  company_name: Optional[str] = Field(description="Company Name")
  company_domain: Optional[str] = Field(description="Company Domain")

class ArticleDetails(BaseModel):
  related_companies: List[Company] = Field(description="Relevent Companies")
  topic: Optional[str] = Field(description="Topic")

##Get contents from HTML file
file_path = "/content/X is launching two new subscription tiers, including a ‘Premium+’ ad-free plan _ TechCrunch.html"
loader = BSHTMLLoader(file_path)
data = loader.load()[0]
data.page_content = re.sub("\n","",data.page_content)

###Get URLs from HTML file
with open("/content/X is launching two new subscription tiers, including a ‘Premium+’ ad-free plan _ TechCrunch.html", 'r') as file:
    html_as_string = file.read()

soup = BeautifulSoup(html_as_string, 'html.parser')
a_tags = soup.find_all('a')
urls = [element.get('href') for element in a_tags ]

##Create Parser from JSON output
output_parser = PydanticOutputParser(pydantic_object=ArticleDetails)
format_instructions = output_parser.get_format_instructions()

In [43]:
prompt_company_template = """
<s>[INST] <<SYS>>
You are a helpful, respectful and honest business analyst.
Always answer as helpfully as possible, while being safe.
Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct.
If you don't know the answer to a question, please don't share false information.
<</SYS>>
Output a list of all the companies mentioned in the article.
Article: {html}
[/INST]
"""

prompt_url_template = """<s>[INST] <<SYS>>
You are a helpful, respectful and honest data analyst.
Always answer as helpfully as possible, while being safe.
Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct.
If you don't know the answer to a question, please don't share false information.

Must only output JSON
<</SYS>>

Given the list of companies, find their websites from this list of URLs
Companies: {companies}
URL:{url_list}
Output the answer in JSON in the following format {{"related_companies": [{{"company_name":company_name,"company_domain":domain}}]]}}.
[/INST]
"""

prompt_topic_template = """<s>[INST] <<SYS>>
You are a helpful, respectful and honest data analyst.
Always answer as helpfully as possible, while being safe.
Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct.
If you don't know the answer to a question, please don't share false information.

Must only output JSON
<</SYS>>

Given the article, give me the headline of the article.
Output the answer in JSON in the following format {{"topic": "topic of the article"}}.
Only must only output JSON
Article: {html}
[/INST]
"""

prompt_output_template = """<s>[INST] <<SYS>>
You are a helpful, respectful and honest data analyst.
Always answer as helpfully as possible, while being safe.
Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct.
If you don't know the answer to a question, please don't share false information.

Must only output JSON
<</SYS>>

Join JSONs by key. Example: Json_1 = {{"a":[1,2,3]}} and Json_2 = {{"b":"text"}} output = {{"a":[1,2,3],"b":"text"}}

Inputs
JSON 1: {json_1}
JSON 2: {json_2}

Only output the result.
[/INST]
"""
### Get the Topic
prompt_topic = PromptTemplate(input_variables=['html'],template=prompt_topic_template)
chain_topic = prompt_topic | llm | StrOutputParser()
output_topic = chain_topic.invoke(input={'html':data.page_content}
                                  # config={"callbacks": [ConsoleCallbackHandler()]}
                                  )

### Get Companies
prompt_company = PromptTemplate(input_variables=['html','headline'], template=prompt_company_template)
chain_company = prompt_company | llm | StrOutputParser()
output_companies = chain_company.invoke(input={'html':data,'headline':output_topic})
print(output_companies)

### Get the URLs by passing in companies as llist and find the domains from hrefs extracted
prompt_url = PromptTemplate(input_variables=['companies','url_list'],template=prompt_url_template)
chain_url = {"companies": chain_company, "url_list": RunnableLambda(lambda x: x['url_list']) } \
            | prompt_url | llm | StrOutputParser()
output_url = chain_url.invoke({'html':data,'url_list':urls}
                              # config={"callbacks": [ConsoleCallbackHandler()]}
                              )


### Combine the JSONs
prompt_output = PromptTemplate(input_variables=['json_1','json_2'],template=prompt_output_template)
chain_output = prompt_output | llm | output_parser
output = chain_output.invoke(input={'json_1':output_url,'json_2':output_topic})
print(json.dumps(output.dict(),indent=2))

Llama.generate: prefix-match hit

llama_print_timings:        load time =     452.18 ms
llama_print_timings:      sample time =      18.55 ms /    31 runs   (    0.60 ms per token,  1671.07 tokens per second)
llama_print_timings: prompt eval time =     659.57 ms /   695 tokens (    0.95 ms per token,  1053.72 tokens per second)
llama_print_timings:        eval time =    1070.25 ms /    30 runs   (   35.68 ms per token,    28.03 tokens per second)
llama_print_timings:       total time =    1857.05 ms /   725 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =     452.18 ms
llama_print_timings:      sample time =      21.93 ms /    36 runs   (    0.61 ms per token,  1641.51 tokens per second)
llama_print_timings: prompt eval time =     681.55 ms /   896 tokens (    0.76 ms per token,  1314.66 tokens per second)
llama_print_timings:        eval time =    1252.57 ms /    35 runs   (   35.79 ms per token,    27.94 tokens per second)
llama_print_timings:       to

Here is the list of companies mentioned in the article:

1. X (formerly known as Twitter)
2. Bloomberg
3. Reuters



llama_print_timings:        load time =     452.18 ms
llama_print_timings:      sample time =      21.66 ms /    36 runs   (    0.60 ms per token,  1662.43 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    1295.40 ms /    36 runs   (   35.98 ms per token,    27.79 tokens per second)
llama_print_timings:       total time =    1448.34 ms /    37 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =     452.18 ms
llama_print_timings:      sample time =      76.51 ms /   115 runs   (    0.67 ms per token,  1503.11 tokens per second)
llama_print_timings: prompt eval time =     628.54 ms /   716 tokens (    0.88 ms per token,  1139.14 tokens per second)
llama_print_timings:        eval time =    4441.19 ms /   114 runs   (   38.96 ms per token,    25.67 tokens per second)
llama_print_timings:       total time =    5642.70 ms /   830 

{
  "related_companies": [
    {
      "company_name": "X (formerly known as Twitter)",
      "company_domain": "https://www.twitter.com"
    },
    {
      "company_name": "Bloomberg",
      "company_domain": "https://www.bloomberg.com"
    },
    {
      "company_name": "Reuters",
      "company_domain": "https://www.reuters.com"
    }
  ],
  "topic": "X is launching two new subscription tiers, including a 'Premium+' ad-free plan"
}
