In [1]:
import pkg_resources
from subprocess import call

packages = ['langchain', 'langchain-core', 'langchain-community', 'ollama', 'colab-xterm', 'newsapi-python', 'jq', 'chromadb']
for package in packages:
    try:
        dist = pkg_resources.get_distribution(package)
        print(f"{package} is already installed.")
    except pkg_resources.DistributionNotFound:
        print(f"{package} not found, installing...")
        call(f"pip install {package}", shell=True)

langchain is already installed.
langchain-core is already installed.
langchain-community is already installed.
ollama is already installed.
colab-xterm is already installed.
newsapi-python is already installed.
jq is already installed.
chromadb is already installed.


In [2]:
from newsapi import NewsApiClient
import json

#Paste your Api key
newsapi = NewsApiClient(api_key='')

def latest_news(data):
    try:
        all_articles = newsapi.get_everything(q=data, language='en', sort_by='publishedAt')
        extracted_data = []
        k=0
        for article in all_articles['articles']:
            if k>8:
                break
            extracted_data.append({
                'description': article.get('description', 'No description available'),
                'url': article.get('url', 'No Url')
                        })
        with open('news.json', 'w') as p:
            json.dump(extracted_data, p)
    except Exception as e:
        print(f"Failed to fetch news articles: {e}")
        return None

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import JSONLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
import json
import ollama

# Load documents from a JSON file
def load_documents(file_path):
    """
    Loads documents from a JSON file using a predefined schema.

    Parameters:
    file_path (str): The path to the JSON file containing the documents.

    Returns:
    list: A list of documents loaded from the file.
    """
    loader = JSONLoader(file_path=file_path, jq_schema='.[] | { description: .description, url: .url}', text_content=False)
    return loader.load()

# Split documents into manageable chunks
def split_documents(documents):
    """
    Splits documents into smaller chunks to manage processing load.

    Parameters:
    documents (list): A list of documents to be split.

    Returns:
    list: A list of split document chunks.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return text_splitter.split_documents(documents)

# Create embeddings and vector store
def create_vector_store(documents):
    """
    Creates a vector store from document embeddings.

    Parameters:
    documents (list): A list of documents or text chunks.

    Returns:
    VectorStore: A vector store containing the documents' embeddings.
    """
    embedding_model = OllamaEmbeddings(model="llama3")
    vector_store = Chroma.from_documents(documents=documents, embedding=embedding_model)
    return vector_store.as_retriever()


In [4]:
from datetime import date
def generate_newsletter(topic):
    latest_news(topic)   
    question = f"""
        # Your Daily Digest: {date.today()}
    
        Welcome to your curated news update, bringing you the latest and most relevant headlines directly to your inbox.
    
        ## Today's Top Story
        ### [Title of the Main News Article](URL_to_article)
        Provide a brief introduction to the top story of the day, emphasizing the main points succinctly.
    
        ---
    
        ## More News
    
        ### [Second News Article Title](URL_to_second_article)
        **Summary**: Offer a concise summary of the second most important news of the day.
    
        ### [Third News Article Title](URL_to_third_article)
        **Summary**: Summarize this article, highlighting key details that inform the reader effectively.
    
        ### [Fourth News Article Title](URL_to_fourth_article)
        **Summary**: Briefly cover the fourth article, focusing on crucial points.
    
        ### [Fifth News Article Title](URL_to_fifth_article)
        **Summary**: Sum up the fifth article, ensuring to pinpoint essential information.
    
        ---
    
        **Instructions**:
        - Write a news summary for the topic: '{topic}'.
        - Ensure the news summaries do not repeat information.
        - Follow the structure provided above as a template for the news summary.
        """
    documents = load_documents('news.json')
    document_splits = split_documents(documents)
    retriever = create_vector_store(document_splits)
    
    formatted_context = "\n\n".join(doc.page_content for doc in retriever.invoke(topic))
    formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
    llm_response = ollama.chat(model='llama3', messages=[{'role': 'user', 'content': formatted_prompt}])
    return llm_response['message']['content']

In [5]:
newsletter = generate_newsletter('World News')

In [6]:
from IPython.display import display, Markdown
display(Markdown(newsletter))

Here is the daily digest with the provided news articles:

**Your Daily Digest: 2024-05-06**

Welcome to your curated news update, bringing you the latest and most relevant headlines directly to your inbox.

## Today's Top Story
### Ahead of the 50th anniversary of Mama Cass Elliot's death, her daughter's memoir hopes to help people know the truth (https://www.bbc.co.uk/news/entertainment-arts-68949240)

A new memoir by Mama Cass Elliot's daughter aims to shed light on the truth behind the singer's life and death. The book is set to be released ahead of the 50th anniversary of her passing, and it promises to give readers a more accurate understanding of Elliot's life and legacy.

---

## More News

### Some of the strides taken by our repositories of artworks and other important artifacts will be accessible – and free of charge – on May 9, when this year’s Museum Day event takes place. (https://www.jpost.com/must/must-visit/article-799976)

This year's Museum Day event is just around the corner, and some of Israel's most esteemed museums are offering special deals for visitors. On May 9, museum-goers can enjoy free admission to a selection of institutions, providing an opportunity for art lovers to explore new exhibits and collections.

### Commemorating six million victims of the Holocaust on Sunday night and Monday, let us remember that the main difference between then and today is the fact that today we have an independent state. (https://www.jpost.com/opinion/article-799973)

As the world marks Holocaust Remembrance Day, a columnist reflects on the significance of the event in modern times. The piece highlights the importance of remembering the past while also acknowledging the progress made since then.

### Database Marketing, Data Mining, Analytics, Big Data, Forecasting, Segmentation, Predictive Modeling, Measurement, Catalog, Business (https://blog.minethatdata.com/2024/05/the-catalog-industry-thirty-year-view.html)

In a fascinating look back at the past three decades, an industry expert examines the evolution of database marketing and its related fields. The article provides insights into how the industry has grown and adapted over time, highlighting key trends and developments.

---

*Instructions*: Write a news summary for the topic: 'World News'. Ensure the news summaries do not repeat information. Follow the structure provided above as a template for the news summary.