diff --git a/qlib/data/data.py b/qlib/data/data.py index e1c9692476..3a74a20277 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -663,7 +663,7 @@ def feature(self, instrument, field, start_index, end_index, freq): data = self.backend_obj(instrument=instrument, field=field, freq=freq)[start_index : end_index + 1] except Exception as e: get_module_logger("data").warning( - f"WARN: data not found for {instrument}.{field}\n\tException info: {str(e)}" + f"WARN: data not found for {instrument}.{field}\n\tFeature exception info: {str(e)}" ) data = pd.Series(dtype=np.float32) return data diff --git a/qlib/data/storage/file_storage.py b/qlib/data/storage/file_storage.py index e55105f573..90e4178ffe 100644 --- a/qlib/data/storage/file_storage.py +++ b/qlib/data/storage/file_storage.py @@ -230,20 +230,19 @@ def __getitem__(self, i: Union[int, slice]) -> Union[Tuple[int, float], pd.Serie raise TypeError(f"type(i) = {type(i)}") with open(self.uri, "rb") as fp: - ref_start_index = int(np.frombuffer(fp.read(4), dtype=" i: - raise IndexError(f"{i}: start index is {ref_start_index}") - fp.seek(4 * (i - ref_start_index) + 4) + if self.start_index > i: + raise IndexError(f"{i}: start index is {self.start_index}") + fp.seek(4 * (i - self.start_index) + 4) return i, struct.unpack("f", fp.read(4))[0] elif isinstance(i, slice): - start_index = i.start - end_index = i.stop - 1 - si = max(ref_start_index, start_index) + start_index = self.start_index if i.start is None else i.start + end_index = self.end_index if i.stop is None else i.stop - 1 + si = max(self.start_index, start_index) if si > end_index: return pd.Series() - fp.seek(4 * (si - ref_start_index) + 4) + fp.seek(4 * (si - self.start_index) + 4) # read n bytes count = end_index - si + 1 data = np.frombuffer(fp.read(4 * count), dtype=" str: - return re.findall("[A-Z][^A-Z]*", self.__class__.__name__)[-2] + return re.findall("[A-Z][^A-Z]*", self.__class__.__name__)[-2].lower() + + @property + def raise_info(self): + parameters_info = [ + f"{_k}={_v}" + for _k, _v in self.__dict__.items() + if not isinstance(_v, (dict, )) or (hasattr(_v, "__len__") and len(_v) < 3) + ] + return f"{self.storage_name.lower()} not exists, storage parameters: {parameters_info}" def check_exists(self) -> bool: """check if storage(uri) exists, if not exists: return False""" @@ -84,15 +95,17 @@ def __getitem__(self, item: Union[slice, Union[int, InstKT]]): ) def _check(self): - # check storage(uri) if not self.check_exists(): - parameters_info = [f"{_k}={_v}" for _k, _v in self.__dict__.items()] - raise ValueError(f"{self.storage_name.lower()} not exists, storage parameters: {parameters_info}") + raise ValueError(self.raise_info) def __getattribute__(self, item): if item == "data": self._check() - return super(BaseStorage, self).__getattribute__(item) + try: + res = super(BaseStorage, self).__getattribute__(item) + except Exception as e: + raise ValueError(f"{self.raise_info}\n\tStorage exception info: {str(e)}") + return res class CalendarStorage(BaseStorage): diff --git a/tests/storage_tests/test_storage.py b/tests/storage_tests/test_storage.py index 79ad78b82e..e7bac658cb 100644 --- a/tests/storage_tests/test_storage.py +++ b/tests/storage_tests/test_storage.py @@ -135,18 +135,12 @@ def test_feature_storage(self): feature = FeatureStorage(instrument="SH600004", field="close", freq="day", uri=self.provider_uri) - with pytest.raises(IndexError): + with pytest.raises(ValueError): print(feature[0]) assert isinstance( feature[815][1], (float, np.float32) ), f"{feature.__class__.__name__}.__getitem__(i: int) error" assert len(feature[815:818]) == 3, f"{feature.__class__.__name__}.__getitem__(s: slice) error" - print(f"feature[815: 818]: {feature[815: 818]}") - - for _item in feature: - assert ( - isinstance(_item, tuple) and len(_item) == 2 - ), f"{feature.__class__.__name__}.__iter__ item type error" - assert isinstance(_item[0], int) and isinstance( - _item[1], (float, np.float32) - ), f"{feature.__class__.__name__}.__iter__ value type error" + print(f"feature[815: 818]: \n{feature[815: 818]}") + + print(f"feature[:].tail(): \n{feature[:].tail()}")