<a href="https://colab.research.google.com/github/m4rk-lewis/GPT-3_breaking_news_parse_with_sentiment_classify/blob/main/GPT_3_breaking_financial_news_summary_and_sentiment_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Install required packages
!pip install feedparser beautifulsoup4 openai

In [2]:
# Step 2: Download and parse the RSS feed
import feedparser
def get_feed(url):
    return feedparser.parse(url)

In [3]:
# Step 3: Extract HTML from the description
from bs4 import BeautifulSoup

def extract_text(html):
    soup = BeautifulSoup(html, "html.parser")
    return soup.get_text()

In [4]:
import openai
openai.api_key = "insert-openAI-API-here"


# Step 4: Summarize using GPT-3
def summarize_text(text):
    response = openai.Completion.create(
        engine="text-davinci-003", 
        prompt=f"remove all unnecesary whitespace and formatting, then provide a short summary of less than 30 words of the following text: {text}",
        temperature=0.7,
        max_tokens=50,
        top_p=1,
        frequency_penalty=0.5,
        presence_penalty=0,
    )
    return response.choices[0].text.strip()

# Step 5: Sentiment classification
def sentiment_classification(text):
    response = openai.Completion.create(
        engine="text-davinci-003", 
        prompt=f"classify the sentiment of this breaking financial news article as a numerical float, with a range of -1 to 1 with 0.1 granularity, where -1 is maximum bearishness and 1 is maximum bullishness in relation to equities: {text}",
        temperature=0.7,
        max_tokens=50,
        top_p=1,
        frequency_penalty=0.5,
        presence_penalty=0,
    )
    return response.choices[0].text.strip()

In [27]:
# Step 6: Store the summarized information in a SQLite database
import sqlite3

def create_db():
    conn = sqlite3.connect('news.db')
    cursor = conn.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS news (published TEXT, title TEXT, summary TEXT, sentiment TEXT)''')
    return conn, cursor

def insert_news(cursor, published, title, summary, sentiment):
    cursor.execute("INSERT INTO news (published, title, summary, sentiment) VALUES (?, ?, ?, ?)", (published, title, summary, sentiment))
    # Commit the changes to the database
    conn.commit()

In [28]:
# Step 7: Continuously monitor the RSS feed for updates
import time

def monitor_feed(url, conn, cursor, interval=60):
    seen_titles = set()
    while True:
        feed = get_feed(url)
        # print(feed.entries[0])
        for entry in feed.entries:
            title = entry.title
            if title not in seen_titles:
                if entry.category == "News" or entry.category =="Central Banks" or entry.category =="Technical Analysis":
                    seen_titles.add(title)
                    category = extract_text(entry.category)
                    published = extract_text(entry.published)
                    description = extract_text(entry.description)
                    summary = summarize_text(description[:4000])
                    sentiment = sentiment_classification(description[:4000])  
                    insert_news(cursor, published, title, summary, sentiment)  
                    print(f"{published} >>> {category} >>> {title} >>> Summary: {summary} >>> Sentiment: {sentiment}")
        # Close the database connection and sleep
        conn.close()
        time.sleep(interval)


In [29]:
'''
# This code will download the RSS feed and process new entries every 60 seconds. 
Adjust the interval parameter in monitor_feed() to control how often the feed is checked.
'''
if __name__ == "__main__":
    url = "https://www.forexlive.com/feed"
    conn, cursor = create_db()
    monitor_feed(url, conn, cursor)


Tue, 18 Apr 2023 10:30:56 GMT >>> News >>> Stocks in a better mood so far today >>> Summary: Stocks up on optimistic tone; Eurostoxx, DAX, CAC 40 and FTSE all rising. >>> Sentiment: 0.7
Tue, 18 Apr 2023 09:40:04 GMT >>> Technical Analysis >>> GBPUSD Technical Analysis - Rangebound >>> Summary: GBPUSD has failed to break above range top, boosted lower by US data. Now bouncing near prior swing, possible rally towards 1.2444. Sellers await 1.2344 break for bearish setup. >>> Sentiment: 0.0
Tue, 18 Apr 2023 09:36:26 GMT >>> Technical Analysis >>> USDCAD Technical Analysis - Bearish Trend Returns >>> Summary: Bearish trend for USDCAD intact, Retail Sales and hawkish Fed remarks boost USD, buyers need break of trendline to gain conviction. >>> Sentiment: -0.2
Tue, 18 Apr 2023 09:32:03 GMT >>> Technical Analysis >>> EURUSD Technical Analysis >>> Summary: EURUSD rejected at February high, double top likely; buyers may lean on trendline for support. >>> Sentiment: 0.3
Tue, 18 Apr 2023 09:00:13 

ProgrammingError: ignored