diff --git a/.github/workflows/master-pipeline.yml b/.github/workflows/master-pipeline.yml new file mode 100644 index 0000000..a95b88f --- /dev/null +++ b/.github/workflows/master-pipeline.yml @@ -0,0 +1,59 @@ +name: Python package + +on: + push: + branches: + - 'master' + + +jobs: + test-code: + name: "Test code" + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Test with pytest + run: | + pip install pytest pytest-cov pytest-dependency + PYTHONPATH="$PYTHONPATH:./keras-batchflow" pytest tests --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html + + page_build: + name: "Build and deploy documentation to staging" + needs: test-code + runs-on: ubuntu-latest + environment: github-pages-staging + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r build_requirements.txt + pip install -e . + - name: Build docs + run: | + mkdocs build --verbose --clean --strict + - name: Deploy + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: gh-pages # The branch the action should deploy to. + folder: site + repository-name: maxsch3/keras-batchflow-test + ssh-key: ${{ secrets.GH_DEPLOY_SECRET }} diff --git a/.github/workflows/release-pipeline.yml b/.github/workflows/release-pipeline.yml new file mode 100644 index 0000000..5d5ecce --- /dev/null +++ b/.github/workflows/release-pipeline.yml @@ -0,0 +1,102 @@ +name: Python package + +on: + release: + types: ['published'] + + +jobs: + + page_build: + name: "Build documentation" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade build + pip install -r requirements.txt + pip install -r build_requirements.txt + pip install -e . + - name: Setup Pages + uses: actions/configure-pages@v4 + - name: Build docs + run: | + mkdocs build --verbose --clean --strict + - name: Upload page artifact + uses: actions/upload-pages-artifact@v3 + with: + path: ./site + + pypi_build: + name: Build pypi package + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade build + pip install -r requirements.txt + pip install -r build_requirements.txt + pip install -e . + - name: Build pypi package + run: | + python -m build + - name: Upload build package as artifact + uses: actions/upload-artifact@v3 + with: + name: python-package-distributions + path: dist/ + + deploy_page: + name: 'Deploy release documentation' + needs: [page_build, pypi_build] + + # Grant GITHUB_TOKEN the permissions required to make a Pages deployment + permissions: + pages: write # to deploy to Pages + id-token: write # to verify the deployment originates from an appropriate source + actions: read # to download an artifact uploaded by `actions/upload-pages-artifact@v3` + + # Deploy to the github-pages environment + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + # Specify runner + deployment step + runs-on: ubuntu-latest + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 + + deploy_pypi: + name: Publish to pypi + needs: [page_build, pypi_build] + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/keras-batchflow + permissions: + id-token: write + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test-pipeline.yml b/.github/workflows/test-pipeline.yml new file mode 100644 index 0000000..92b8a63 --- /dev/null +++ b/.github/workflows/test-pipeline.yml @@ -0,0 +1,50 @@ +name: Python package + +on: + push: + branches: + - '*' + - '!master' + +jobs: + test-code: + name: "Test code" + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Test with pytest + run: | + pip install pytest pytest-cov pytest-dependency + PYTHONPATH="$PYTHONPATH:./keras-batchflow" pytest tests --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html + + test-doc-build: + name: "Test documentation build" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r build_requirements.txt + pip install -e . + - name: Test doc build + run: | + mkdocs build --verbose --clean --strict diff --git a/.travis.yml b/.travis.yml index c08a031..ffa9f39 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,10 @@ language: python python: -# - "2.7" -# - "3.4" -# - "3.5" - - "3.6" - - "3.7" + - "3.8" + - "3.9" + - "3.10" env: - - framework=TF1 +# - framework=TF1 - framework=TF2 script: @@ -16,7 +14,7 @@ jobs: include: - stage: deploy-test if: branch = master - python: "3.6" + python: "3.10" script: skip install: - pip install -r requirements_tf1.txt @@ -34,7 +32,7 @@ jobs: branch: master - stage: deploy-prod if: tag IS present - python: "3.6" + python: "3.10" script: skip install: - pip install -r requirements_tf1.txt diff --git a/build_requirements.txt b/build_requirements.txt index 7bf123f..9cd2179 100644 --- a/build_requirements.txt +++ b/build_requirements.txt @@ -1,7 +1,7 @@ -setuptools-scm==3.5.0 -mkdocs==1.1.2 -mkdocs-jupyter==0.13.0 -notebook==6.1.5 -pymdown-extensions==6.3 -nbconvert==5.6.1 +setuptools-scm +mkdocs +mkdocs-jupyter +notebook +pymdown-extensions +nbconvert git+https://github.com/tomchristie/mkautodoc.git#egg=mkautodoc diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1d17a94 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pandas>=2.0.0 +scikit-learn +tensorflow-cpu diff --git a/requirements_tf1.txt b/requirements_tf1.txt deleted file mode 100644 index 64924a1..0000000 --- a/requirements_tf1.txt +++ /dev/null @@ -1,8 +0,0 @@ -Keras==2.3.1 -numpy==1.17.2 -pandas==0.25.1 -pytest==5.2.1 -pytest-dependency==0.5.1 -scikit-learn==0.21.3 -scipy==1.3.1 -tensorflow==1.15.4 diff --git a/requirements_tf2.txt b/requirements_tf2.txt deleted file mode 100644 index 8e36044..0000000 --- a/requirements_tf2.txt +++ /dev/null @@ -1,7 +0,0 @@ -numpy==1.17.2 -pandas==0.25.1 -pytest==5.2.1 -pytest-dependency==0.5.1 -scikit-learn==0.21.3 -scipy==1.3.1 -tensorflow==2.2.1 diff --git a/setup.py b/setup.py index 2273af2..630bd38 100644 --- a/setup.py +++ b/setup.py @@ -17,10 +17,9 @@ download_url='https://github.com/maxsch3/batchflow', license='MIT', setup_requires=['setuptools_scm'], - install_requires=['numpy>=1.9.1', - 'scipy>=0.14', + install_requires=['numpy>=1.20.0', 'scikit-learn', - 'pandas'], + 'pandas>=2.0.0'], extras_require={ 'visualize': ['pydot>=1.2.4'], 'tests': ['pytest', @@ -33,7 +32,7 @@ 'Intended Audience :: Science/Research', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.10', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules' ], diff --git a/test/test_base_random_cell_transform.py b/tests/test_base_random_cell_transform.py similarity index 96% rename from test/test_base_random_cell_transform.py rename to tests/test_base_random_cell_transform.py index 08d283f..f8aad9d 100644 --- a/test/test_base_random_cell_transform.py +++ b/tests/test_base_random_cell_transform.py @@ -159,11 +159,15 @@ def test_transform(self): def test_transform_many_cols(self): ct = LocalVersionTransform([.0, 1.], cols=['var1', 'var2']) - batch = ct.transform(self.df.copy()) + # make a bigger batch to make sure augmenatation will always be used on the batch + # when batch is small there is a small chance the batch will sieve through without augmentation due to its + # random nature + seed_df = self.df.sample(100, replace=True) + batch = ct.transform(seed_df.copy()) assert isinstance(batch, pd.DataFrame) - assert batch.shape == self.df.shape - assert not batch.equals(self.df) - batch = self.df.copy() + assert batch.shape == seed_df.shape + assert not batch.equals(seed_df) + batch = seed_df.copy() batch1 = ct.transform(batch) # test if transform does in-place transform assert batch1.equals(batch) diff --git a/test/test_batch_fork.py b/tests/test_batch_fork.py similarity index 100% rename from test/test_batch_fork.py rename to tests/test_batch_fork.py diff --git a/test/test_batch_generator.py b/tests/test_batch_generator.py similarity index 100% rename from test/test_batch_generator.py rename to tests/test_batch_generator.py diff --git a/test/test_batch_generator_keras.py b/tests/test_batch_generator_keras.py similarity index 100% rename from test/test_batch_generator_keras.py rename to tests/test_batch_generator_keras.py diff --git a/test/test_batch_generator_tf.py b/tests/test_batch_generator_tf.py similarity index 100% rename from test/test_batch_generator_tf.py rename to tests/test_batch_generator_tf.py diff --git a/test/test_batch_shaper.py b/tests/test_batch_shaper.py similarity index 100% rename from test/test_batch_shaper.py rename to tests/test_batch_shaper.py diff --git a/test/test_feature_dropout.py b/tests/test_feature_dropout.py similarity index 84% rename from test/test_feature_dropout.py rename to tests/test_feature_dropout.py index a698c38..ca134b9 100644 --- a/test/test_feature_dropout.py +++ b/tests/test_feature_dropout.py @@ -34,17 +34,19 @@ def test_row_dist(self): assert binom_test(b, 1000, 0.6) > 0.01 def test_cols_dist(self): + sample_size = 1000 fd = FeatureDropout([0., 1.], ['var1', 'var2', 'label'], drop_values='', col_probs=[.5, .3, .2]) - batch = fd.transform(self.df.sample(1000, replace=True)) + batch = fd.transform(self.df.sample(sample_size, replace=True)) b = (batch == '').sum(axis=0) - c, p = chisquare(b, [520, 300, 180]) + c, p = chisquare(b, [sample_size * .53, sample_size * .3, sample_size * .17]) assert p > 0.001 def test_uniform_col_dist(self): + sample_size = 1000 fd = FeatureDropout([0., 1.], ['var1', 'var2', 'label'], drop_values='') - batch = fd.transform(self.df.sample(1000, replace=True)) + batch = fd.transform(self.df.sample(sample_size, replace=True)) b = (batch == '').sum(axis=0) - c, p = chisquare(b, [333, 333, 333]) + c, p = chisquare(b, f_exp=[sample_size/3, sample_size/3, sample_size/3]) assert p > 0.01 def test_different_drop_values(self): @@ -55,20 +57,21 @@ def test_different_drop_values(self): assert b[1] == 0 assert b[2] == 0 b = (batch == 'v2').sum(axis=0) - assert binom_test(b[1], 1000, 0.33) > 0.01 + assert binom_test(b[1], 1000, 0.33) > 0.001 assert b[0] == 0 assert b[2] == 0 b = (batch == 'v3').sum(axis=0) - assert binom_test(b[2], 1000, 0.33) > 0.01 + assert binom_test(b[2], 1000, 0.33) > 0.001 assert b[0] == 0 assert b[1] == 0 def test_multiple_feature_drop(self): + sample_size = 100 fd = FeatureDropout([0., .7, .3], ['var1', 'var2', 'label'], drop_values='', col_probs=[.5, .3, .2]) - batch = fd.transform(self.df.sample(1000, replace=True)) + batch = fd.transform(self.df.sample(sample_size, replace=True)) b = (batch == '').sum(axis=1).value_counts().sort_index().tolist() - c, p = chisquare(b, [700, 300]) - assert p > 0.01 + c, p = chisquare(b, [sample_size * .7, sample_size * .3]) + assert p > 0.001 def test_parameter_error_handling(self): # column name is not str diff --git a/test/test_numpy_encoder_adaptor.py b/tests/test_numpy_encoder_adaptor.py similarity index 89% rename from test/test_numpy_encoder_adaptor.py rename to tests/test_numpy_encoder_adaptor.py index 7e421d7..4253f9b 100644 --- a/test/test_numpy_encoder_adaptor.py +++ b/tests/test_numpy_encoder_adaptor.py @@ -20,14 +20,16 @@ def test_transform(self): def test_transform_integer_array(self): """ - This tests that pandas specific IntegerArray is converted into numpy format + This tests that pandas specific IntegerArray is converted into numpy format. + IntergerArray of type "Int64" in pandas is a very handy integer data type which supports Nones and does + not require conversion to float data type :return: """ data = pd.Series([1, 2, 4, 5], dtype="Int64") nea = NumpyEncoderAdaptor() tr = nea.transform(data) assert isinstance(tr, np.ndarray) - assert np.issubdtype(tr.dtype, np.object) + assert np.issubdtype(tr.dtype, object) def test_transform_datetime(self): """ @@ -45,7 +47,7 @@ def test_inverse_transform(self): nea = NumpyEncoderAdaptor() tr = nea.inverse_transform(data) assert isinstance(tr, pd.Series) - assert np.issubdtype(tr.dtype, np.int) + assert np.issubdtype(tr.dtype, np.int64) tr = nea.inverse_transform(data, dtype=np.float32) assert isinstance(tr, pd.Series) assert np.issubdtype(tr.dtype, np.float32) diff --git a/test/test_pandas_encoder_adaptor.py b/tests/test_pandas_encoder_adaptor.py similarity index 97% rename from test/test_pandas_encoder_adaptor.py rename to tests/test_pandas_encoder_adaptor.py index 194a575..8c6c140 100644 --- a/test/test_pandas_encoder_adaptor.py +++ b/tests/test_pandas_encoder_adaptor.py @@ -34,7 +34,7 @@ def test_inverse_transform(self): pea = PandasEncoderAdaptor() tr = pea.inverse_transform(data) assert isinstance(tr, pd.Series) - assert np.issubdtype(tr.dtype, np.int) + assert np.issubdtype(tr.dtype, np.int64) tr = pea.inverse_transform(data, dtype=np.float32) assert isinstance(tr, pd.Series) assert np.issubdtype(tr.dtype, np.float32) diff --git a/test/test_shuffle_noise.py b/tests/test_shuffle_noise.py similarity index 100% rename from test/test_shuffle_noise.py rename to tests/test_shuffle_noise.py diff --git a/test/test_triplet_pk_generator.py b/tests/test_triplet_pk_generator.py similarity index 100% rename from test/test_triplet_pk_generator.py rename to tests/test_triplet_pk_generator.py diff --git a/test/test_triplet_pk_generator2d.py b/tests/test_triplet_pk_generator2d.py similarity index 100% rename from test/test_triplet_pk_generator2d.py rename to tests/test_triplet_pk_generator2d.py diff --git a/test/test_triplet_pk_generator_keras.py b/tests/test_triplet_pk_generator_keras.py similarity index 100% rename from test/test_triplet_pk_generator_keras.py rename to tests/test_triplet_pk_generator_keras.py diff --git a/test/test_var_shaper.py b/tests/test_var_shaper.py similarity index 100% rename from test/test_var_shaper.py rename to tests/test_var_shaper.py