In [67]:
import sys
sys.path.append('..')

import torch
from transformers import AutoTokenizer, pipeline
from langchain import LLMChain, HuggingFacePipeline, PromptTemplate
from utils.external_retrieval import get_matching_urls, get_webpage_text

In [68]:
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [69]:
pipeline = pipeline("text-generation",
                        model=MODEL_NAME,
                        tokenizer=tokenizer,
                        torch_dtype=torch.bfloat16,
                        trust_remote_code=True,
                        device_map="auto",
                        truncation=True,
                        max_length=5000,
                        do_sample=True,
                        top_k=10,
                        num_return_sequences=1,
                        eos_token_id=tokenizer.eos_token_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [70]:
llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={"temperature":0})

In [71]:
prompt_template = """
                Write a combined summary of the following 3 articles delimited by triple backticks.
                Ignore any text that is not from the English language.
                Your response should cover all keypoints in the text.
                ```{text}```
                SUMMARY:
                """

In [72]:
prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [85]:
matching_urls = get_matching_urls(1)
texts = []
for i in matching_urls:
    texts.append(get_webpage_text(matching_url=i))


In [86]:
matching_urls

[]

In [81]:
text = ""
for i in range(3):
    text += "\n\nARTICLE-"+str(i+1)+": "
    texts[i] = ''.join(texts[i].split('\n\n'))
    text += texts[i]   


In [82]:
print(text)



ARTICLE-1: Dan Milmo | Page 65 of 311 | The GuardianSkip to main contentSkip to navigationPrint subscriptions Sign inSearch jobsSearchUK editionUK editionUS editionAustralia editionEurope editionInternational editionThe Guardian - Back to homeNewsOpinionSportCultureLifestyleShowMoreShow MoreNewsView all NewsUK newsWorld newsClimate crisisUkraineFootballNewslettersBusinessEnvironmentUK politicsEducationSocietyScienceTechGlobal developmentObituariesOpinionView all OpinionThe Guardian viewColumnistsCartoonsOpinion videosLettersSportView all SportFootballCricketRugby unionTennisCyclingF1GolfBoxingRugby leagueRacingUS sportsCultureView all CultureFilmMusicTV & radioBooksArt & designStageGamesClassicalLifestyleView all LifestyleFashionFoodRecipesTravelHealth & fitnessWomenMenLove & sexBeautyHome & gardenMoneyCarsSearch input google-search SearchSupport usPrint subscriptionsUK editionUS editionAustralia editionEurope editionInternational editionSearch jobsHire with Guardian JobsHolidaysLive

In [73]:
output = llm_chain.run(text)

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


In [45]:
print(output[output.find("SUMMARY"):])

SUMMARY:
                    * The article discusses the legacy of the Commonwealth Games in Glasgow, Scotland, 12 years after the event.
                    * Manchester, England, served as the precedent for the Commonwealth Games' legacy, with a focus on regeneration and investment in the area.
                    * While Manchester's east end has seen some regeneration, the area still faces issues with poverty and unemployment.
                    * Locals in Glasgow express concerns about the lack of investment in jobs and the benefits of the Games for the community.
                    * The article highlights the importance of consulting with the local community in ensuring that the Commonwealth Games lead to lasting change.
                    * The BBC's Ros Atkins discusses the potential
