Skip to content

Commit

Permalink
Add ticker crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
kujyp committed Jan 3, 2021
1 parent c3cea20 commit 509b731
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 5 deletions.
5 changes: 4 additions & 1 deletion dcbot/crawler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,11 @@ def summary_content(self) -> str:
def summary_child_content(self) -> str:
raise NotImplementedError()

def stack_crawled_time(self) -> "CrawledContent":
self.set_crawled_time(datetime.datetime.now(tz=datetime.timezone(datetime.timedelta(hours=9))))

class Crawler:

class SeleniumCrawler:
def __init__(self, web_driver_container: WebDriverContainer) -> None:
self.web_driver_container = web_driver_container

Expand Down
6 changes: 3 additions & 3 deletions dcbot/crawler/feargreed.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
from typing import Tuple, Optional, Dict, Any

from dcbot.crawler import Crawler, CrawledContent
from dcbot.crawler import SeleniumCrawler, CrawledContent


class FearGreedCrawledContent(CrawledContent):
Expand Down Expand Up @@ -37,14 +37,14 @@ def summary_child_content(self) -> str:
return ret


class FearGreedCrawler(Crawler):
class FearGreedCrawler(SeleniumCrawler):
def crawl(self) -> FearGreedCrawledContent:
ret = FearGreedCrawledContent()

url = "https://money.cnn.com/data/fear-and-greed/"
self.web_driver_container.get(url)
ret.append_reference(url)
ret.set_crawled_time(datetime.datetime.now(tz=datetime.timezone(datetime.timedelta(hours=9))))
ret.stack_crawled_time()

needle_div = self.web_driver_container.find_element_by_id("needleChart")
for each in needle_div.find_elements_by_xpath("ul/li"):
Expand Down
89 changes: 89 additions & 0 deletions dcbot/crawler/tickers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from typing import Tuple, List

import requests
from dataclasses import dataclass


@dataclass
class Ticker:
def __init__(self, symbol: str, security_name: str, is_etf: bool) -> None:
super().__init__()
self.symbol = symbol
self.security_name = security_name
self.is_etf = is_etf


class TickerCrawler:
@staticmethod
def crawl():
ret: List[Ticker] = []
ret.extend(TickerCrawler.get_nasdaqlisted())
ret.extend(TickerCrawler.get_otherlisted())
return ret

@staticmethod
def get_nasdaqlisted():
ret: List[Ticker] = []

url = "http://www.nasdaqtrader.com/dynamic/SymDir/nasdaqlisted.txt"
r = requests.get(url)
for idx, eachline in enumerate(r.text.splitlines()):
if eachline.startswith("File Creation Time"):
continue

splited = eachline.split("|")
if idx == 0:
assert_msg = "Updated. Need to rewrite codes."
assert splited[0] == "Symbol", assert_msg
assert splited[1] == "Security Name", assert_msg
assert splited[2] == "Market Category", assert_msg
assert splited[3] == "Test Issue", assert_msg
assert splited[4] == "Financial Status", assert_msg
assert splited[5] == "Round Lot Size", assert_msg
assert splited[6] == "ETF", assert_msg
continue

splited = eachline.split("|")
symbol = splited[0].strip()
security_name = splited[1].strip()
_market_category = splited[2].strip()
_test_issue = splited[3]
_financial_status = splited[4]
_round_lot_size = splited[5]
is_etf = splited[6].strip() == "Y"
ret.append(Ticker(symbol, security_name, is_etf))
return ret

@staticmethod
def get_otherlisted():
ret: List[Ticker] = []

url = "http://www.nasdaqtrader.com/dynamic/SymDir/otherlisted.txt"
r = requests.get(url)
for idx, eachline in enumerate(r.text.splitlines()):
if eachline.startswith("File Creation Time"):
continue

splited = eachline.split("|")
if idx == 0:
assert_msg = "Updated. Need to rewrite codes."
assert splited[0] == "ACT Symbol", assert_msg
assert splited[1] == "Security Name", assert_msg
assert splited[2] == "Exchange", assert_msg
assert splited[3] == "CQS Symbol", assert_msg
assert splited[4] == "ETF", assert_msg
assert splited[5] == "Round Lot Size", assert_msg
assert splited[6] == "Test Issue", assert_msg
assert splited[7] == "NASDAQ Symbol", assert_msg
continue

splited = eachline.split("|")
act_symbol = splited[0].strip()
security_name = splited[1].strip()
_cqs_symbol = splited[3].strip()
is_etf = splited[4].strip() == "Y"
_round_lot_size = splited[5]
_test_issue = splited[6]
_nasdaq_symbol = splited[7]
ret.append(Ticker(act_symbol, security_name, is_etf))
return ret
72 changes: 71 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ dcbot = "dcbot.__main__:main"
[tool.poetry.dependencies]
python = "^3.6"
selenium = "^3.141.0"
requests = "^2.25.1"
dataclasses = {version = "^0.6", python = "<3.8"}

[tool.poetry.dev-dependencies]
pytest = "^6.1"
Expand Down
9 changes: 9 additions & 0 deletions tests/crawler/test_ticker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from dcbot.crawler.tickers import TickerCrawler, Ticker


def test_ticker():
crawled = TickerCrawler.crawl()
assert len(crawled) > 0
assert Ticker("AAPL", "Apple Inc. - Common Stock", False) in crawled
assert Ticker("QQQ", "Invesco QQQ Trust, Series 1", True) in crawled
assert Ticker("NRGU", "MicroSectors U.S. Big Oil Index 3X Leveraged ETN", False) in crawled

0 comments on commit 509b731

Please sign in to comment.