In [8]:
import yaml

with open('src/modules/news_summary/tickers.yaml', 'r') as file:
    file = yaml.safe_load(file)

# Test function
#for a, b in file.items():
#    print(a, b)


In [9]:
import requests
from bs4 import BeautifulSoup

def scrape(
        url: str,
        number_headlines: int,
        printi: bool
) -> list:
    """
    Scrape data from Google News
    """
    html_content = requests.get(url).content
    soup = BeautifulSoup(html_content, 'html.parser')
    headlines = soup.find_all('a', class_='JtKRv')
    all_headlines = [i.text for i in headlines]

    if printi:
        print(all_headlines)

    return all_headlines[:number_headlines]

# Test function
#scrape_data = scrape(
#    url=file["Solana_(SOL)"],
#    number_headlines=30,
#    printi=True
#)

In [6]:
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from datetime import datetime

import os
from dotenv import load_dotenv
load_dotenv('src/.env')


llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4-turbo")
current_date = datetime.now().strftime("%Y-%m-%d-%H")

def summarizer(
        prompt_path: str,
        llm: ChatOpenAI,
        headlines: list,
        ticker: str,
        current_date: datetime,
        print_out: bool,
    ):
    "Summarize Google News data"
    with open(prompt_path, 'r', encoding='utf-8') as file:
        markdown_string = file.read()
    prompt_template = PromptTemplate(template=markdown_string, input_variables=["headlines", "ticker", "current_date"])
    chain = prompt_template | llm | StrOutputParser()
    llm_output = chain.invoke({"headlines": headlines, "ticker": ticker, "current_date": current_date})

    if print_out:
        print(llm_output)

    # Removing the backticks and "json" tag
    llm_output = llm_output.strip("```json").strip()

    # Save json file
    parsed_json = json.loads(llm_output)

    # Step 3: Save the dictionary as a JSON file
    file_path = f"all_crypto_sentiment/{ticker}-{current_date}.json"
    with open(file_path, "w") as json_file:
        json.dump(parsed_json, json_file, indent=4)

    return llm_output

# Test function

import json
out = summarizer(
    prompt_path="src/prompts/news_summarizer.md",
    llm=llm,
    headlines=scrape_data,
    ticker=list(file.keys())[4],
    current_date=current_date,
    print_out=True
)

```json
{
  "Date": "2024-12-12-09",
  "Ticker": "Solana_(SOL)",
  "Key_Insights": "The headlines indicate a vibrant interest in meme coins like Dogecoin, but also highlight significant attention towards Solana (SOL) in the context of competing with Ethereum and potentially leading the altcoin race. There is also mention of Solana rebounding alongside other major cryptocurrencies.",
  "Financial_Health": "Based on the headlines, Solana appears to be maintaining a resilient market position, especially noted during rebounds in cryptocurrency market values. The comparison with Ethereum suggests a robust technological framework that may be attracting investor confidence.",
  "Market_and_Industry_Trends": "The broader cryptocurrency market is showing signs of recovery and resilience, with specific attention to technological advancements and market competition. Solana's mention alongside Ethereum suggests it is part of significant industry discussions about scalable blockchain solutions.",
 

In [10]:
for key, value in file.items():

    print(key, value)

    scrape_data = scrape(
        url=file[key],
        number_headlines=30,
        printi=True
    )

    print(scrape_data)

    out = summarizer(
        prompt_path="src/prompts/news_summarizer.md",
        llm=llm,
        headlines=scrape_data,
        ticker=key,
        current_date=current_date,
        print_out=True
    )




Ethereum_(ETH) https://news.google.com/search?q=etherium%20(ETH)&hl=en-US&gl=US&ceid=US%3Aen
['Ethereum Price Forecast: ETH could see new all-time high above $5,000, on-chain data signals bullish momentum', 'Ethereum ETFs See $500 Million Boost in Just Two Days', 'BlackRock And Fidelity Spark Ethereum (ETH) Bullish Surge With $500M Purchase', 'Ethereum Price Prediction Suggests ETH Could Surge to $20,000 — What Are Whales Doing?', 'Ethereum Price Could Hit $5,000 Due to Institutional Demand: CryptoQuant', "Microsoft 'Open' To Ethereum ETF If This Happens: VanEck Exec", '$4.26 Billion in Bitcoin and Ethereum Options Expires Today: What’s Next for Crypto?', 'Ethereum supply squeeze and ETF demand set stage for $5,000 breakout', 'Market Awaits Impact of Nearly $3 Billion Bitcoin and Ethereum Options Expiring Today', 'Ethereum Expected to Hit $5,000 Amid Rising Demand: CryptoQuant', 'How To Stake Ethereum And Earn Rewards', 'BlackRock and Fidelity Purchase $500 Million in Ethereum ETFs Fol

In [11]:
import os
import pandas as pd
import json


data = {
    "Date": [],
    "Ticker": [],
    "Numerical_Score": []
}

directory = os.fsencode("all_crypto_sentiment")

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.endswith(".json"):
        filepath = os.path.join("all_crypto_sentiment", filename)
        with open(filepath, 'r') as json_file:
            data_dict = json.load(json_file)
        data["Date"].append(data_dict["Date"])
        data["Ticker"].append(data_dict["Ticker"])
        data["Numerical_Score"].append(data_dict["Recommendation"]["Numerical_Score"])

    else:
        print(f"Skipping this file: {filename}")

In [19]:
df = pd.DataFrame(data)




for i in df["Ticker"].unique():
    print(i)
    df_test = df[df["Ticker"] == i]
    print(df_test)

Toncoin_(TON)
             Date         Ticker  Numerical_Score
0   2024-12-10-19  Toncoin_(TON)                7
13  2024-12-12-09  Toncoin_(TON)                5
22  2024-12-09-13  Toncoin_(TON)                8
26  2024-12-11-09  Toncoin_(TON)                5
TRON_(TRX)
             Date      Ticker  Numerical_Score
1   2024-12-12-09  TRON_(TRX)                8
12     2024-12-10  TRON_(TRX)                7
23  2024-12-11-09  TRON_(TRX)                5
27  2024-12-09-13  TRON_(TRX)                8
XRP_(XRP)
             Date     Ticker  Numerical_Score
2   2024-12-10-19  XRP_(XRP)                5
14  2024-12-12-09  XRP_(XRP)                8
20  2024-12-09-13  XRP_(XRP)                5
25  2024-12-11-09  XRP_(XRP)                5
Dogecoin_(DOGE)
             Date           Ticker  Numerical_Score
3   2024-12-09-13  Dogecoin_(DOGE)                5
15  2024-12-11-09  Dogecoin_(DOGE)                3
21  2024-12-10-19  Dogecoin_(DOGE)                2
24  2024-12-12-09  Dogecoi