In [38]:
pip install beautifulsoup4 requests transformers



In [37]:
import requests
from bs4 import BeautifulSoup
from transformers import BertTokenizer, BertForSequenceClassification
from torch.nn.functional import softmax
import torch

# BERT 모델
bert = 'nlptown/bert-base-multilingual-uncased-sentiment'
tokenizer = BertTokenizer.from_pretrained(bert)
model = BertForSequenceClassification.from_pretrained(bert)

urls = [
    'https://www.investing.com/news/stock-market-news',
    'https://www.cnbc.com/finance/',
    'https://finance.yahoo.com/',
    'https://www.investing.com/news/stock-market-news'
]

def crawl_news(urls):
    headlines = []
    for url in urls:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        news_items = soup.find_all('a')
        for item in news_items:
            headline_text = item.get_text(strip=True)
            if headline_text:
                headlines.append(headline_text)
    return headlines

def analyze_sentiment(headlines):
    positive_count = 0
    total_headlines = len(headlines)

    for headline in headlines:
        inputs = tokenizer(headline, return_tensors='pt', max_length=512, truncation=True)
        outputs = model(**inputs)
        probs = softmax(outputs.logits, dim=-1)
        sentiment = torch.argmax(probs, dim=-1).item()

        # sentiment 값이 4 또는 5이면 긍정적으로 간주 (5점 척도 중)
        if sentiment > 3:
            positive_count += 1

    positive_percentage = (positive_count / total_headlines) * 100 if total_headlines > 0 else 0
    return positive_percentage

def classify_market(positive_percentage):
    if positive_percentage > 70:
        return "Bull"
    elif positive_percentage < 30:
        return "Bear"
    else:
        return "Neutral"

# 메인 프로세스 실행
headlines = crawl_news(urls)
if headlines:
    positive_percentage = analyze_sentiment(headlines)
    market_status = classify_market(positive_percentage)
    print(f"Market Status: {market_status}")
else:
    print("none")


Market Status: Neutral


In [None]:
'''#헤드라인 태그 찾기
import requests
from bs4 import BeautifulSoup

def fetch_and_display_h1_tags(url):

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }


    response = requests.get(url, headers=headers)

    # 응답을 BeautifulSoup 객체로 파싱
    soup = BeautifulSoup(response.content, 'html.parser')

    # 헤드라인에 해당하는 태그를 찾음
    tags = soup.find_all('a')

    #
    for index, h1 in enumerate(tags):
        print(f"H1 Tag {index+1}: {h1.text.strip()}")

# 웹 페이지 URL
url = 'https://finance.yahoo.com/'
fetch_and_display_tags(url)
