In [38]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
import re
import time
import random

class FinancialCrawler:
    def __init__(self, tickers_file='tickers.csv', csv_file='financial_results.csv'):
        self.tickers_file = tickers_file
        self.csv_file = csv_file

        self.tickers = []
        self.results_df = None
        self.start_ticker = None
        self.start_ticker_index = 0
        
        self.load_tickers()
        self.load_past_data()
        
    def load_tickers(self):
        if os.path.exists(self.tickers_file):
            df = pd.read_csv(self.tickers_file)
            self.tickers = df["Tickers"].tolist()
            self.tickers = [str(x) for x in self.tickers]
            self.tickers.sort()
        else:
            # 파일이 존재하지 않을 경우 오류 발생
            raise FileNotFoundError(f"{self.tickers_file} not found.")
        
    def load_past_data(self):
        if os.path.exists(self.csv_file):
            self.results_df = pd.read_csv(self.csv_file, index_col=0)
        else:
            self.results_df = pd.DataFrame(columns=['Index', 'Market Cap', 'Income', 'Sales', 'Book/sh', 'Cash/sh', 'Dividend Est.', 'Dividend TTM', 'Dividend Ex-Date', 'Employees', 'Option/Short', 'Sales Surprise', 'SMA20', 'P/E', 'Forward P/E', 'PEG', 'P/S', 'P/B', 'P/C', 'P/FCF', 'Quick Ratio', 'Current Ratio', 'Debt/Eq', 'LT Debt/Eq', 'EPS Surprise', 'SMA50', 'EPS (ttm)', 'EPS next Y', 'EPS next Q', 'EPS this Y', 'EPS next Y', 'EPS next 5Y', 'EPS past 5Y', 'Sales past 5Y', 'EPS Y/Y TTM', 'Sales Y/Y TTM', 'EPS Q/Q', 'Sales Q/Q', 'SMA200', 'Insider Own', 'Insider Trans', 'Inst Own', 'Inst Trans', 'ROA', 'ROE', 'ROI', 'Gross Margin', 'Oper. Margin', 'Profit Margin', 'Payout', 'Earnings', 'Trades', 'Shs Outstand', 'Shs Float', 'Short Float', 'Short Ratio', 'Short Interest', '52W Range', '52W High', '52W Low', 'RSI (14)', 'Recom', 'Rel Volume', 'Avg Volume', 'Volume', 'Perf Week', 'Perf Month', 'Perf Quarter', 'Perf Half Y', 'Perf Year', 'Perf YTD', 'Beta', 'ATR (14)', 'Volatility', 'Target Price', 'Prev Close', 'Price', 'Change']) # 만약 파일이 없다면 빈 데이터프레임 생성
        # print(len(self.tickers))
        self.start_ticker = self.tickers[len(self.results_df)]
        self.start_ticker_index = len(self.results_df)
    
    def fetch_all_data(self):
        for ticker in self.tickers[self.start_ticker_index:]:
            print(f"현재 탐색 티커 : {ticker}")
            url = f'https://finviz.com/quote.ashx?t={ticker}&p=d'
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
                'Referer': 'https://example.com',
                'Accept-Language': 'ko-KR,ko;q=0.9',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Connection': 'keep-alive'
            }
            
            res = requests.get(url, headers=headers)
            while res.status_code == 429:
                print("너무 많은 요청.. 재시도")
                time.sleep(random.uniform(3,5))
                res = requests.get(url, headers=headers)
                
            soup = BeautifulSoup(res.text, "html.parser")
            # 테이블 찾기
            table = soup.find("table",{"class" : "snapshot-table2"})
            
            # 테이블의 모든 행 찾기
            rows = table.find_all("tr")
            
            # 데이터 저장
            table_data = []
            
            for row in rows:
                cells = row.find_all("td")
                row_data = [cell.get_text(strip=True) for cell in cells]
                table_data.append(row_data)

            # 데이터프레임으로 변환
            df = pd.DataFrame(table_data)
            df = df.reset_index(drop=True)
            
            value_list = []
            for i in range(0,12,2):
                value_list.extend(df[i+1].tolist())
            self.results_df.loc[ticker] = value_list
            self.results_df.to_csv('financial_results.csv',index=True)
        

        
financial_crawler = FinancialCrawler()
financial_crawler.fetch_all_data()
print(f"financial_crawler.start_ticker : {financial_crawler.start_ticker}")
print(f"self.start_ticker_index : {financial_crawler.start_ticker_index}")

현재 탐색 티커 : CCTS
현재 탐색 티커 : CCU
현재 탐색 티커 : CDE
현재 탐색 티커 : CDIO
현재 탐색 티커 : CDLR
현재 탐색 티커 : CDLX


KeyboardInterrupt: 