Skip to content

Commit

Permalink
Merge pull request #379 from zhupr/fix_usinex_collector
Browse files Browse the repository at this point in the history
Fix us_index collector
  • Loading branch information
you-n-g committed Mar 29, 2021
2 parents 253378a + 4b66304 commit 45f7867
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
7 changes: 6 additions & 1 deletion scripts/data_collector/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def save_new_companies(self):
$ python collector.py save_new_companies --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data
"""
df = self.get_new_companies()
if df is None or df.empty:
raise ValueError(f"get new companies error: {self.index_name}")
df = df.drop_duplicates([self.SYMBOL_FIELD_NAME])
df.loc[:, self.INSTRUMENTS_COLUMNS].to_csv(
self.instruments_dir.joinpath(f"{self.index_name.lower()}_only_new.txt"), sep="\t", index=False, header=None
Expand Down Expand Up @@ -184,7 +186,10 @@ def parse_instruments(self):
logger.info(f"start parse {self.index_name.lower()} companies.....")
instruments_columns = [self.SYMBOL_FIELD_NAME, self.START_DATE_FIELD, self.END_DATE_FIELD]
changers_df = self.get_changes()
new_df = self.get_new_companies().copy()
new_df = self.get_new_companies()
if new_df is None or new_df.empty:
raise ValueError(f"get new companies error: {self.index_name}")
new_df = new_df.copy()
logger.info("parse history companies by changes......")
for _row in tqdm(changers_df.sort_values(self.DATE_FIELD_NAME, ascending=False).itertuples(index=False)):
if _row.type == self.ADD:
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_collector/us_index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
class WIKIIndex(IndexBase):
# NOTE: The US stock code contains "PRN", and the directory cannot be created on Windows system, use the "_" prefix
# https://superuser.com/questions/613313/why-cant-we-make-con-prn-null-folder-in-windows
INST_PREFIX = "_"
INST_PREFIX = ""

def __init__(self, index_name: str, qlib_dir: [str, Path] = None, request_retry: int = 5, retry_sleep: int = 3):
super(WIKIIndex, self).__init__(
Expand Down Expand Up @@ -123,7 +123,7 @@ class NASDAQ100Index(WIKIIndex):
MAX_WORKERS = 16

def filter_df(self, df: pd.DataFrame) -> pd.DataFrame:
if not (set(df.columns) - {"Company", "Ticker"}):
if len(df) >= 100 and "Ticker" in df.columns:
return df.loc[:, ["Ticker"]].copy()

@property
Expand Down

0 comments on commit 45f7867

Please sign in to comment.