Skip to content

Commit

Permalink
fix(datasets): move datasets to another repo (skfolio-datasets)
Browse files Browse the repository at this point in the history
  • Loading branch information
HugoDelatte committed Mar 9, 2024
1 parent 4982e27 commit 4b31e7d
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 1,017 deletions.
Binary file removed datasets/ftse100_dataset.csv.gz
Binary file not shown.
Binary file removed datasets/nasdaq_dataset.csv.gz
Binary file not shown.
5 changes: 4 additions & 1 deletion src/skfolio/datasets/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,10 @@ def download_dataset(
DataFrame with each row representing one observation and each column
representing the asset price of a given observation.
"""
url = f"https://github.com/skfolio/skfolio/raw/main/datasets/{data_filename}.csv.gz"
url = (
f"https://github.com/skfolio/skfolio-datasets/raw/main/"
f"datasets/{data_filename}.csv.gz"
)

data_home = get_data_home(data_home=data_home)
filepath = os.path.join(data_home, f"{data_filename}.pkz")
Expand Down
1,001 changes: 0 additions & 1,001 deletions tests/data/covariance.csv

This file was deleted.

15 changes: 0 additions & 15 deletions tests/test_utils/test_stats.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as scd
Expand Down Expand Up @@ -72,12 +69,6 @@ def linkage_matrix(distance):
return linkage_matrix


@pytest.fixture(scope="module")
def non_psd_cov():
file = Path(Path(__file__).parent.parent, "data", "covariance.csv")
return pd.read_csv(file, sep=",").to_numpy()


def test_n_bins_freedman(returns):
n_bins = n_bins_freedman(returns)
assert n_bins == 329
Expand All @@ -97,12 +88,6 @@ def test_cov_nearest(nasdaq_X):
assert is_cholesky_dec(cov2)


def test_cov_nearest_cov_non_psd(non_psd_cov):
assert not is_cholesky_dec(non_psd_cov)
cov = cov_nearest(non_psd_cov)
assert is_cholesky_dec(cov)


def test_corr_nearest_psd():
x = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]])
y = cov_nearest(x, higham=True)
Expand Down

0 comments on commit 4b31e7d

Please sign in to comment.