In [38]:
import yaml

with open('src/modules/news_summary/tickers.yaml', 'r') as file:
    file = yaml.safe_load(file)

# Test function
#for a, b in file.items():
#    print(a, b)


In [39]:
import requests
from bs4 import BeautifulSoup

def scrape(
        url: str,
        number_headlines: int,
        printi: bool
) -> list:
    """
    Scrape data from Google News
    """
    html_content = requests.get(url).content
    soup = BeautifulSoup(html_content, 'html.parser')
    headlines = soup.find_all('a', class_='JtKRv')
    all_headlines = [i.text for i in headlines]

    if printi:
        print(all_headlines)

    return all_headlines[:number_headlines]

# Test function
#scrape_data = scrape(
#    url=file["Solana_(SOL)"],
#    number_headlines=30,
#    printi=True
#)

In [40]:
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from datetime import datetime

import os
from dotenv import load_dotenv
load_dotenv('src/.env')


llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4-turbo")
current_date = datetime.now().strftime("%Y-%m-%d-%H")

def summarizer(
        prompt_path: str,
        llm: ChatOpenAI,
        headlines: list,
        ticker: str,
        current_date: datetime,
        print_out: bool,
    ):
    "Summarize Google News data"
    with open(prompt_path, 'r', encoding='utf-8') as file:
        markdown_string = file.read()
    prompt_template = PromptTemplate(template=markdown_string, input_variables=["headlines", "ticker", "current_date"])
    chain = prompt_template | llm | StrOutputParser()
    llm_output = chain.invoke({"headlines": headlines, "ticker": ticker, "current_date": current_date})

    if print_out:
        print(llm_output)

    # Removing the backticks and "json" tag
    llm_output = llm_output.strip("```json").strip()

    # Save json file
    parsed_json = json.loads(llm_output)

    # Step 3: Save the dictionary as a JSON file
    file_path = f"all_crypto_sentiment/{ticker}-{current_date}.json"
    with open(file_path, "w") as json_file:
        json.dump(parsed_json, json_file, indent=4)

    return llm_output

# Test function

import json
out = summarizer(
    prompt_path="src/prompts/news_summarizer.md",
    llm=llm,
    headlines=scrape_data,
    ticker=list(file.keys())[4],
    current_date=current_date,
    print_out=True
)

{
  "Date": "2024-12-18-12",
  "Ticker": "Solana_(SOL)",
  "Key_Insights": "The provided headlines largely focus on Bitcoin Cash (BCH) and its recent performance, upgrades, and predictions. They indicate a bullish sentiment on BCH with expected price increases and strategic upgrades. There is also mention of other cryptocurrencies like Ethereum, Cardano, and a comparison with Bitcoin. However, there is no direct mention of Solana (SOL) in the headlines.",
  "Financial_Health": "The financial health of Solana (SOL) cannot be directly assessed from these headlines as they do not provide information specific to Solana. However, the bullish sentiment in the broader crypto market, as indicated by Bitcoin's push towards $100,000 and rallies in other coins, could suggest a favorable environment for cryptocurrencies in general, including Solana.",
  "Market_and_Industry_Trends": "The cryptocurrency market is showing signs of bullish behavior and interest in technological enhancements, such as 

In [42]:
for key, value in file.items():

    print(key, value)

    scrape_data = scrape(
        url=file[key],
        number_headlines=30,
        printi=False
    )

    print(scrape_data)

    out = summarizer(
        prompt_path="src/prompts/news_summarizer.md",
        llm=llm,
        headlines=scrape_data,
        ticker=key,
        current_date=current_date,
        print_out=True
    )




Ethereum_(ETH) https://news.google.com/search?q=etherium%20(ETH)&hl=en-US&gl=US&ceid=US%3Aen
['Ethereum Poised for a Major Comeback in 2025, Says Senior Investment Strategist at Bitwise', 'Analyst Says It’s Time for New Ethereum All-Time High, Sees Bitcoin Going ‘Full Santa Claus Mode’', 'Blackrock Continues Ethereum Buying Spree: Experts Think $5000 ETH Price Could Ignite Historic Rally for', 'Ethereum Price Surges Past $4000, ETH Whales and On-Chain Data Hint at a New ATH Soon', 'Ethereum spot ETF net inflow reaches $145m', 'Top 3 Price Prediction Bitcoin, Ethereum, Ripple: BTC, ETH and XRP show signs of short-term correction', 'Omni Network Launches ‘Core’ Mainnet to Unify Ethereum Rollups', 'Ethereum (ETH) Poised For $6K Surge As Institutional Inflows Signal Major Breakout', 'Should You Forget Bitcoin and Buy Ethereum Instead in 2025?', '"Ethereum (ETH) to Reach $5,000 by Year-End if It Surpasses $4,100"', 'Ethereum’s Future: Will ETH Grow 300% by 2025?', "Shiba Inu's Bullish Patte

In [26]:
import os
import pandas as pd
import json


data = {
    "Date": [],
    "Ticker": [],
    "Numerical_Score": []
}

directory = os.fsencode("all_crypto_sentiment")

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.endswith(".json"):
        filepath = os.path.join("all_crypto_sentiment", filename)
        with open(filepath, 'r') as json_file:
            data_dict = json.load(json_file)
        data["Date"].append(data_dict["Date"])
        data["Ticker"].append(data_dict["Ticker"])
        data["Numerical_Score"].append(data_dict["Recommendation"]["Numerical_Score"])

    else:
        print(f"Skipping this file: {filename}")

In [29]:
data

{'Date': ['2024-12-10-19',
  '2024-12-12-09',
  '2024-12-10-19',
  '2024-12-09-13',
  '2024-12-09-13',
  '2024-12-09-13',
  '2024-12-13-18',
  '2024-12-11-09',
  '2024-12-12-09',
  '2024-12-11-09',
  '2024-12-11-09',
  '2024-12-13-18',
  '2024-12-09-13',
  '2024-12-13-18',
  '2024-12-10-19',
  '2024-12-10',
  '2024-12-12-09',
  '2024-12-12-09',
  '2024-12-11-09',
  '2024-12-13-18',
  '2024-12-10-19',
  '2024-12-10-19',
  '2024-12-13-18',
  '2024-12-11-09',
  '2024-12-12-09',
  '2024-12-09-13',
  '2024-12-10-19',
  '2024-12-09-13',
  '2024-12-11-09',
  '2024-12-13-18',
  '2024-12-12-09',
  '2024-12-11-09',
  '2024-12-11-09',
  '2024-12-09-13',
  '2024-12-13-18',
  '2024-12-13-18',
  '2024-12-12-09',
  '2024-12-12-09',
  '2024-12-09-13',
  '2024-12-10-19',
  '2024-12-13-18',
  '2024-12-13-18',
  '2024-12-09-13',
  '2024-12-11-09',
  '2024-12-10-19',
  '2024-12-11-09',
  '2024-12-13-18',
  '2024-12-09-13',
  '2024-12-12-09',
  '2024-12-13-18',
  '2024-12-10-19',
  '2024-12-10-19',
  '2024

In [31]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Date,Ticker,Numerical_Score
0,2024-12-10-19,Toncoin_(TON),7
1,2024-12-12-09,TRON_(TRX),8
2,2024-12-10-19,XRP_(XRP),5
3,2024-12-09-13,Dogecoin_(DOGE),5
4,2024-12-09-13,Ethereum_(ETH),8


In [None]:
test = df[df['Ticker'] == 'Solana_(SOL)']
test.dtypes


Date               object
Ticker             object
Numerical_Score     int64
dtype: object

In [None]:
import datetime

date_str = test['Date'].iloc[0]
date_obj = datetime.datetime.strptime(date_str, "%Y%m%d%H").date()
print(date_obj)



date_obj = datetime.datetime.strptime(clean_date_str, "%Y%m%d%H").date()

ValueError: time data '2024-12-10-19' does not match format '%Y%m%d%H'

In [None]:

for i in df["Ticker"].unique():
    print(i)
    df_test = df[df["Ticker"] == i]
    print(df_test)