In [15]:
import yaml

with open('src/modules/news_summary/tickers.yaml', 'r') as file:
    file = yaml.safe_load(file)

# Test function
for a, b in file.items():
    print(a, b)


Ethereum_(ETH) https://news.google.com/search?q=etherium%20(ETH)&hl=en-US&gl=US&ceid=US%3Aen
Tether_(USDT) https://news.google.com/search?q=Tether%20(USDT)&hl=en-US&gl=US&ceid=US%3Aen
Binance_Coin_(BNB) https://news.google.com/search?q=Binance%20Coin%20(BNB)&hl=en-US&gl=US&ceid=US%3Aen
USD_Coin_(USDC) https://news.google.com/search?q=USD%20Coin%20(USDC)&hl=en-US&gl=US&ceid=US%3Aen
Solana_(SOL) https://news.google.com/search?q=Solana%20(SOL)&hl=en-US&gl=US&ceid=US%3Aen
XRP_(XRP) https://news.google.com/search?q=XRP%20(XRP)&hl=en-US&gl=US&ceid=US%3Aen
Cardano_(ADA) https://news.google.com/search?q=Cardano%20(ADA)&hl=en-US&gl=US&ceid=US%3Aen
Dogecoin_(DOGE) https://news.google.com/search?q=Dogecoin%20(DOGE)&hl=en-US&gl=US&ceid=US%3Aen
TRON_(TRX) https://news.google.com/search?q=TRON%20(TRX)&hl=en-US&gl=US&ceid=US%3Aen
Toncoin_(TON) https://news.google.com/search?q=Toncoin%20(TON)&hl=en-US&gl=US&ceid=US%3Aen
Polygon_(MATIC) https://news.google.com/search?q=Polygon%20(MATIC)&hl=en-US&gl=US&

In [16]:
import requests
from bs4 import BeautifulSoup

def scrape(
        url: str,
        number_headlines: int,
        printi: bool
) -> list:
    """
    Scrape data from Google News
    """
    html_content = requests.get(url).content
    soup = BeautifulSoup(html_content, 'html.parser')
    headlines = soup.find_all('a', class_='JtKRv')
    all_headlines = [i.text for i in headlines]

    if printi:
        print(all_headlines)

    return all_headlines[:number_headlines]

# Test function
scrape_data = scrape(
    url=file["Solana_(SOL)"],
    number_headlines=30,
    printi=True
)

["Solana (SOL) Reaches Key Level: What's Next? XRP Finds New Skyrocket Fuel? Pepe (PEPE) Becomes Meme Coin Leader", 'How high Solana can rally in December, and can SOL beat XRP?', 'Solana (SOL) vs. Toncoin (TON): Which Is the Smarter Investment for December?', 'Go Beyond Solana (SOL) This Cycle: $750 in These 4 Coins Could Make $1,500,000 By Early 2026', 'US SEC Swatting Down Solana (SOL) ETFs Prior to Administration Change: Report', 'Trader Who Tweeted Solana (SOL) Would Hit $250 While It Was at $1.50 Makes a Daring Call for Another', 'Ethereum ($ETH), Solana ($SOL) Binance Coin ($BNB) about to bounce big?', 'Can Bitcoin, Ethereum, Dogecoin, Shiba Inu or Solana Beat XRP In 2025? Poll Says Yes, One Coin Stands Out', 'Solana and Ethereum could soar but this meme coin has the market buzzing with 2024 hype', 'SOL Global Boosts Investment in Solana Ecosystem', 'Analyst Whales HQ: Shiba Inu Price Breakout Imminent, Solana Indicators Go Bullish While A New Presale Star Surges 610% - More Gai

In [17]:
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from datetime import datetime

import os
from dotenv import load_dotenv
load_dotenv('src/.env')


llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4-turbo")
current_date = datetime.now().strftime("%Y-%m-%d-%H")

def summarizer(
        prompt_path: str,
        llm: ChatOpenAI,
        headlines: list,
        ticker: str,
        current_date: datetime,
        print_out: bool,
    ):
    "Summarize Google News data"
    with open(prompt_path, 'r', encoding='utf-8') as file:
        markdown_string = file.read()
    prompt_template = PromptTemplate(template=markdown_string, input_variables=["headlines", "ticker", "current_date"])
    chain = prompt_template | llm | StrOutputParser()
    llm_output = chain.invoke({"headlines": headlines, "ticker": ticker, "current_date": current_date})

    if print_out:
        print(llm_output)

    # Removing the backticks and "json" tag
    llm_output = llm_output.strip("```json").strip()

    # Save json file
    parsed_json = json.loads(llm_output)

    # Step 3: Save the dictionary as a JSON file
    file_path = f"all_crypto_sentiment/{ticker}-{current_date}.json"
    with open(file_path, "w") as json_file:
        json.dump(parsed_json, json_file, indent=4)

    return llm_output

# Test function

import json
out = summarizer(
    prompt_path="src/prompts/news_summarizer.md",
    llm=llm,
    headlines=scrape_data,
    ticker=list(file.keys())[4],
    current_date=current_date,
    print_out=True
)

{
  "Date": "2024-12-10-19",
  "Ticker": "Solana_(SOL)",
  "Key_Insights": "The headlines suggest a diverse perspective on Solana's performance and potential. There are optimistic price predictions with figures reaching up to $1,000 by 2025, alongside significant interest in its ecosystem from investors. However, there are also concerns regarding regulatory issues with ETFs and market competition from new and existing cryptocurrencies.",
  "Financial_Health": "Based on the headlines, Solana appears to be experiencing volatile but generally positive financial performance. Predictions of substantial price increases indicate investor confidence and potential underpinned growth. However, the mention of long liquidations suggests some market skepticism and potential instability.",
  "Market_and_Industry_Trends": "The cryptocurrency sector is showing signs of both growth and volatility. Developments like Ethereum retesting significant price points and new coins gaining traction indicate a hi

In [18]:
import sys

for key, value in file.items():

    print(key, value)

    scrape_data = scrape(
        url=file[key],
        number_headlines=30,
        printi=False
    )

    print(scrape_data)

    out = summarizer(
        prompt_path="src/prompts/news_summarizer.md",
        llm=llm,
        headlines=scrape_data,
        ticker=key,
        current_date=current_date,
        print_out=False
    )




Ethereum_(ETH) https://news.google.com/search?q=etherium%20(ETH)&hl=en-US&gl=US&ceid=US%3Aen
Tether_(USDT) https://news.google.com/search?q=Tether%20(USDT)&hl=en-US&gl=US&ceid=US%3Aen
['Tether’s USDT approved as accepted virtual asset in Abu Dhabi', '109 Million Wallets Hold USDT: Tether Reports', 'Tether USDT wallets surge to 109 million, challenge Bitcoin and Ethereum user base', 'Tether: 109 million on-chain wallets hold USDT', 'Tether’s USDt Stablecoin Gets Green Light For Regulated Services In Abu Dhabi', 'Bitcoin or stablecoins: Where are investors placing their bets?', 'Stablecoins on shaky ground? US council calls on Congress to enact crypto oversight', 'Lutnick’s Cantor in Talks With Tether About $2 Billion Bitcoin Lending Project', 'Tether mints an additional $3B in USDt stablecoins', 'Stablecoin Issuer Tether Details ‘Extraordinary’ Growth of USDT Wallets Driven by Small Holders', 'Tether’s USDT Hits Milestone Amid Surge In Stablecoin Adoption', 'Tether’s USDT stablecoin int

In [19]:
import sys
sys.stdout = sys.__stdout__  # Restore the default standard output