Tree of Thought implementation


In [39]:
import requests
import os
import openai
import pandas as pd
import numpy as np
from transformers import pipeline

In [40]:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

In [41]:
os.environ['NEWS_API_KEY'] = "47d333159d8745c1b51cacd5440d65c1"

In [42]:
# Query News API
def call_with_search_parameters(options):
    api_key = os.environ.get('NEWS_API_KEY')
    url = 'https://newsapi.org/v2/top-headlines'

    call_parameters = {
        #'country': options.get('country'),
        'category': options.get('category'),
        'sources': options.get('sources'),
        'q': options.get('q'),
        'pageSize': options.get('pageSize', 10),
        'page': options.get('page', 1)
    }
    # Filter out None values to prevent sending empty parameters
    call_parameters = {k: v for k, v in call_parameters.items() if v is not None}

    response = requests.get(url, params=call_parameters, headers={'Authorization': f'Bearer {api_key}'})

    if response.status_code == 200:
        json_data = response.json()
        if 'articles' in json_data:
            articles = json_data['articles']
            df = pd.DataFrame(articles, columns=['source', 'author', 'title', 'description', 'url', 'urlToImage', 'publishedAt', 'content'])
            
            for article in articles:
                source = article.get('source', {}).get('name', None)
                title = article.get('title')
                content = article.get('content')
                if not title or not content:
                    print(f"Warning: Missing title or content in article {article['title']}")
                    continue
                # print(f"Source: {source}")
                # print(f"Title: {title}")
                # print(f"Content: {content}")
                # print("-" * 50)  # Print a separator for better readability
            return df
        else:
            print("Unexpected API response:", json_data)
            return pd.DataFrame()
    else:
        print("Unexpected API response:", response.status_code, response.text)
        return pd.DataFrame()
    
df = call_with_search_parameters({'q': 'israel', 'pageSize': 20,})
df.head()



Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content
0,"{'id': 'news24', 'name': 'News24'}",Khaya Koko,WATCH | Pandor says Israel's occupation of Pal...,In an assertive address to the United Nations ...,https://www.news24.com/news24/politics/watch-m...,https://cdn.24.co.za/files/Cms/General/d/9362/...,2023-10-25T13:47:12,In an assertive address to the United Nations ...
1,"{'id': 'il-sole-24-ore', 'name': 'Il Sole 24 O...",,"Israele, si tratta per gli ostaggi - Il Sole ...","Israele, si tratta per gli ostaggi - Il Sole ...",https://stream24.ilsole24ore.com/gallery/mondo...,https://i2.res.24o.it/stream/assets/img/galler...,2023-10-25T12:30:43Z,Members of the Palestine Red Crescent Society ...
2,"{'id': 'cbs-news', 'name': 'CBS News'}",CBS News,10/24: Prime Time with John Dickerson,John Dickerson reports on Rep. Tom Emmer dropp...,https://www.cbsnews.com/video/102423-cbs-news-...,https://assets2.cbsnewsstatic.com/hub/i/r/2023...,2023-10-25T12:21:02+00:00,Watch CBS News\r\nCopyright ©2023 CBS Interact...
3,"{'id': 'der-tagesspiegel', 'name': 'Der Tagess...",Sandra Calvez,Aktion für israelische Geiseln: Plakate in der...,Rot-weiße Ausdrucke mit der Aufschrift „Entfüh...,https://www.tagesspiegel.de/potsdam/landeshaup...,https://www.tagesspiegel.de/images/entfuhrt-pl...,2023-10-25T12:16:02+00:00,Nur einige Tage hingen die auffälligen rot-wei...
4,"{'id': 'die-zeit', 'name': 'Die Zeit'}",Johanna Roth,"USA und Israel: Solidarisch, aber mit ganz eig...",Zwischen Geiseldiplomatie und Waffenlieferunge...,https://www.zeit.de/politik/ausland/2023-10/us...,https://img.zeit.de/politik/ausland/2023-10/us...,2023-10-25T12:14:49+00:00,An der Grenze zum Gazastreifen stehen die Panz...


In [45]:
# Generating summaries with Huggingface model

def add_sums_to_df(df):
    df['summary'] = None
    for idx, row in df.iterrows():
        article_text = row['content']
        if article_text:  # this will check if article_text is not None or not NaN
            summary = summarizer(article_text, min_length=5, max_length=100)
            summary = summary[0]['summary_text']
            df.at[idx, 'summary'] = summary
    return df

df = add_sums_to_df(df)
df.head()


Your max_length is set to 100, but your input_length is only 47. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=23)
Your max_length is set to 100, but your input_length is only 47. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=23)
Your max_length is set to 100, but your input_length is only 41. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=20)
Your max_length is set to 100, but your input_length is only 78. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=39)
Your

Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content,summary
0,"{'id': 'news24', 'name': 'News24'}",Khaya Koko,WATCH | Pandor says Israel's occupation of Pal...,In an assertive address to the United Nations ...,https://www.news24.com/news24/politics/watch-m...,https://cdn.24.co.za/files/Cms/General/d/9362/...,2023-10-25T13:47:12,In an assertive address to the United Nations ...,In an assertive address to the United Nations ...
1,"{'id': 'il-sole-24-ore', 'name': 'Il Sole 24 O...",,"Israele, si tratta per gli ostaggi - Il Sole ...","Israele, si tratta per gli ostaggi - Il Sole ...",https://stream24.ilsole24ore.com/gallery/mondo...,https://i2.res.24o.it/stream/assets/img/galler...,2023-10-25T12:30:43Z,Members of the Palestine Red Crescent Society ...,Members of the Palestine Red Crescent Society ...
2,"{'id': 'cbs-news', 'name': 'CBS News'}",CBS News,10/24: Prime Time with John Dickerson,John Dickerson reports on Rep. Tom Emmer dropp...,https://www.cbsnews.com/video/102423-cbs-news-...,https://assets2.cbsnewsstatic.com/hub/i/r/2023...,2023-10-25T12:21:02+00:00,Watch CBS News\r\nCopyright ©2023 CBS Interact...,"Get browser notifications for breaking news, l..."
3,"{'id': 'der-tagesspiegel', 'name': 'Der Tagess...",Sandra Calvez,Aktion für israelische Geiseln: Plakate in der...,Rot-weiße Ausdrucke mit der Aufschrift „Entfüh...,https://www.tagesspiegel.de/potsdam/landeshaup...,https://www.tagesspiegel.de/images/entfuhrt-pl...,2023-10-25T12:16:02+00:00,Nur einige Tage hingen die auffälligen rot-wei...,Nur einige Tage hingen die auffälligen rot-wei...
4,"{'id': 'die-zeit', 'name': 'Die Zeit'}",Johanna Roth,"USA und Israel: Solidarisch, aber mit ganz eig...",Zwischen Geiseldiplomatie und Waffenlieferunge...,https://www.zeit.de/politik/ausland/2023-10/us...,https://img.zeit.de/politik/ausland/2023-10/us...,2023-10-25T12:14:49+00:00,An der Grenze zum Gazastreifen stehen die Panz...,An der Grenze zum Gazastreifen stehen die Panz...


In [44]:
# Query OpenAI API
NEWS_API_KEY = os.environ.get('NEWS_API_KEY')



# Save to CSV
df.to_csv('news.csv', index=False)