scikit-learn-contrib · bellet · Aug 1, 2019 · Jul 26, 2019 · Jul 26, 2019 · Jul 26, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -1,27 +1,57 @@
 language: python
 sudo: false
 cache: pip
-python:
-  - "2.7"
-  - "3.4"
-  - "3.6"
-before_install:
-  - sudo apt-get install liblapack-dev
-  - pip install --upgrade pip pytest
-  - pip install wheel cython numpy scipy codecov pytest-cov
-  - if $TRAVIS_PYTHON_VERSION == "3.6"; then
-        pip install scikit-learn;
-    else
-        pip install scikit-learn==0.20.3;
-    fi
-  - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") ||
-          ($TRAVIS_PYTHON_VERSION == "2.7")]]; then
-              pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8;
-    fi
-script:
-  # we do coverage for all versions so that codecov will merge them: this
-  # way we will see that both paths (with or without skggm) are tested
-  - pytest test --cov;
-after_success:
-  - bash <(curl -s https://codecov.io/bash)
-
+language: python
+matrix:
+  include:
+  - name: "Pytest python 2.7 with skggm"
+    python: "2.7"
+    before_install:
+      - sudo apt-get install liblapack-dev
+      - pip install --upgrade pip pytest
+      - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn
+      - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8;
+    script:
+      - pytest test --cov;
+    after_success:
+      - bash <(curl -s https://codecov.io/bash)
+  - name: "Pytest python 3.4 without skggm"
+    python: "3.4"
+    before_install:
+      - sudo apt-get install liblapack-dev
+      - pip install --upgrade pip pytest
+      - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn
+    script:
+      - pytest test --cov;
+    after_success:
+      - bash <(curl -s https://codecov.io/bash)
+  - name: "Pytest python 3.6 with skggm"
+    python: "3.6"
+    before_install:
+      - sudo apt-get install liblapack-dev
+      - pip install --upgrade pip pytest
+      - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn
+      - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8;
+    script:
+      - pytest test --cov;
+    after_success:
+      - bash <(curl -s https://codecov.io/bash)
+  - name: "Pytest python 3.7 with skggm"
+    python: "3.7"
+    before_install:
+      - sudo apt-get install liblapack-dev
+      - pip install --upgrade pip pytest
+      - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn
+      - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8;
+    script:
+      - pytest test --cov;
+    after_success:
+      - bash <(curl -s https://codecov.io/bash)
+  - name: "Syntax checking with flake8"
+    python: "3.7"
+    before_install:
+      - pip install flake8
+    script:
+      - flake8 --extend-ignore=E111,E114 --show-source;
+      # Use this instead to have a syntax check only on the diff:
+      # - source ./build_tools/travis/flake8_diff.sh;
diff --git a/build_tools/travis/flake8_diff.sh b/build_tools/travis/flake8_diff.sh
@@ -0,0 +1,132 @@
+# This file is not used yet but we keep it in case we need to check the pep8 difference
+# on the diff (see .travis.yml)
+#
+#!/bin/bash
+# copied-pasted and adapted from http://github.com/sklearn-contrib/imbalanced-learn
+# (more precisely: https://raw.githubusercontent.com/glemaitre/imbalanced-learn
+# /adcb9d8e6210b321dac2c1b06879e5e889d52d77/build_tools/travis/flake8_diff.sh)
+
+# This script is used in Travis to check that PRs do not add obvious
+# flake8 violations. It relies on two things:
+#   - find common ancestor between branch and
+#     scikit-learn/scikit-learn remote
+#   - run flake8 --diff on the diff between the branch and the common
+#     ancestor
+#
+# Additional features:
+#   - the line numbers in Travis match the local branch on the PR
+#     author machine.
+#   - ./build_tools/travis/flake8_diff.sh can be run locally for quick
+#     turn-around
+
+set -e
+# pipefail is necessary to propagate exit codes
+set -o pipefail
+
+PROJECT=metric-learn/metric-learn
+PROJECT_URL=https://github.com/$PROJECT.git
+
+# Find the remote with the project name (upstream in most cases)
+REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '')
+
+# Add a temporary remote if needed. For example this is necessary when
+# Travis is configured to run in a fork. In this case 'origin' is the
+# fork and not the reference repo we want to diff against.
+if [[ -z "$REMOTE" ]]; then
+    TMP_REMOTE=tmp_reference_upstream
+    REMOTE=$TMP_REMOTE
+    git remote add $REMOTE $PROJECT_URL
+fi
+
+echo "Remotes:"
+echo '--------------------------------------------------------------------------------'
+git remote --verbose
+
+# Travis does the git clone with a limited depth (50 at the time of
+# writing). This may not be enough to find the common ancestor with
+# $REMOTE/master so we unshallow the git checkout
+if [[ -a .git/shallow ]]; then
+    echo -e '\nTrying to unshallow the repo:'
+    echo '--------------------------------------------------------------------------------'
+    git fetch --unshallow
+fi
+
+if [[ "$TRAVIS" == "true" ]]; then
+    if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]
+    then
+        # In main repo, using TRAVIS_COMMIT_RANGE to test the commits
+        # that were pushed into a branch
+        if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then
+            if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then
+                echo "New branch, no commit range from Travis so passing this test by convention"
+                exit 0
+            fi
+            COMMIT_RANGE=$TRAVIS_COMMIT_RANGE
+        fi
+    else
+        # We want to fetch the code as it is in the PR branch and not
+        # the result of the merge into master. This way line numbers
+        # reported by Travis will match with the local code.
+        LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST
+        # In Travis the PR target is always origin
+        git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF
+    fi
+fi
+
+# If not using the commit range from Travis we need to find the common
+# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master
+if [[ -z "$COMMIT_RANGE" ]]; then
+    if [[ -z "$LOCAL_BRANCH_REF" ]]; then
+        LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD)
+    fi
+    echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:"
+    echo '--------------------------------------------------------------------------------'
+    git log -2 $LOCAL_BRANCH_REF
+
+    REMOTE_MASTER_REF="$REMOTE/master"
+    # Make sure that $REMOTE_MASTER_REF is a valid reference
+    echo -e "\nFetching $REMOTE_MASTER_REF"
+    echo '--------------------------------------------------------------------------------'
+    git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF
+    LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF)
+    REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF)
+
+    COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \
+        echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)"
+
+    if [ -z "$COMMIT" ]; then
+        exit 1
+    fi
+
+    COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT)
+
+    echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\
+         "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:"
+    echo '--------------------------------------------------------------------------------'
+    git show --no-patch $COMMIT_SHORT_HASH
+
+    COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH"
+
+    if [[ -n "$TMP_REMOTE" ]]; then
+        git remote remove $TMP_REMOTE
+    fi
+
+else
+    echo "Got the commit range from Travis: $COMMIT_RANGE"
+fi
+
+echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
+     "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):"
+echo '--------------------------------------------------------------------------------'
+
+# to not include the context (some lines before and after the modified lines), add the
+# flag --unified=0 (warning: it will not include some errors like for instance adding too
+# much blank lines
+check_files() {
+    git diff $COMMIT_RANGE | flake8 --diff --show-source --extend-ignore=E111,E114
+}
+
+check_files
+
+echo -e "No problem detected by flake8\n"
+
diff --git a/doc/conf.py b/doc/conf.py
@@ -20,8 +20,10 @@
 
 # General information about the project.
 project = u'metric-learn'
-copyright = u'2015-2019, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet, and Nathalie Vauquier'
-author = u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet, and Nathalie Vauquier'
+copyright = (u'2015-2019, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien '
+             u'Bellet and Nathalie Vauquier')
+author = (u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet and '
+          u'Nathalie Vauquier')
 version = '0.5.0'
 release = '0.5.0'
 language = 'en'

diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py
@@ -88,7 +88,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 # distances between points for the task at hand. Especially in higher
 # dimensions when Euclidean distances are a poor way to measure distance, this
 # becomes very useful.
-# 
+#
 # Basically, we learn this distance:
 # :math:`D(x, x') = \sqrt{(x-x')^\top M(x-x')}`. And we learn the parameters
 # :math:`M` of this distance to satisfy certain constraints on the distance
@@ -113,12 +113,12 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 ######################################################################
 # Large Margin Nearest Neighbour
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# 
+#
 # LMNN is a metric learning algorithm primarily designed for k-nearest
 # neighbor classification. The algorithm is based on semidefinite
 # programming, a sub-class of convex programming (as most Metric Learning
 # algorithms are).
-# 
+#
 # The main intuition behind LMNN is to learn a pseudometric under which
 # all data instances in the training set are surrounded by at least k
 # instances that share the same class label. If this is achieved, the
@@ -136,7 +136,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 ######################################################################
 # Fit and then transform!
 # -----------------------
-# 
+#
 
 # setting up LMNN
 lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6)
@@ -162,7 +162,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 
 ######################################################################
 # Pretty neat, huh?
-# 
+#
 # The rest of this notebook will briefly explain the other Metric Learning
 # algorithms before plotting them. Also, while we have first run ``fit``
 # and then ``transform`` to see our data transformed, we can also use
@@ -172,7 +172,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 ######################################################################
 # Information Theoretic Metric Learning
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# 
+#
 # ITML uses a regularizer that automatically enforces a Semi-Definite
 # Positive Matrix condition - the LogDet divergence. It uses soft
 # must-link or cannot like constraints, and a simple algorithm based on
@@ -231,7 +231,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 ######################################################################
 # Least Squares Metric Learning
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# 
+#
 # LSML is a simple, yet effective, algorithm that learns a Mahalanobis
 # metric from a given set of relative comparisons. This is done by
 # formulating and minimizing a convex loss function that corresponds to
@@ -277,7 +277,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 ######################################################################
 # Local Fisher Discriminant Analysis
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# 
+#
 # LFDA is a linear supervised dimensionality reduction method. It is
 # particularly useful when dealing with multimodality, where one ore more
 # classes consist of separate clusters in input space. The core
@@ -298,7 +298,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 ######################################################################
 # Relative Components Analysis
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# 
+#
 # RCA is another one of the older algorithms. It learns a full rank
 # Mahalanobis distance metric based on a weighted sum of in-class
 # covariance matrices. It applies a global linear transformation to assign
@@ -402,7 +402,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 def create_constraints(labels):
     import itertools
     import random
-    
+
     # aggregate indices of same class
     zeros = np.where(y == 0)[0]
     ones = np.where(y == 1)[0]
@@ -413,7 +413,7 @@ def create_constraints(labels):
     twos_ = list(itertools.combinations(twos, 2))
     # put them together!
     sim = np.array(zeros_ + ones_ + twos_)
-    
+
     # similarily, put together indices in different classes
     dis = []
     for zero in zeros:
@@ -424,21 +424,25 @@ def create_constraints(labels):
     for one in ones:
         for two in twos:
             dis.append((one, two))
-            
+
     # pick up just enough dissimilar examples as we have similar examples
     dis = np.array(random.sample(dis, len(sim)))
-
-    # return an array of pairs of indices of shape=(2*len(sim), 2), and the corresponding labels, array of shape=(2*len(sim))
-    # Each pair of similar points have a label of +1 and each pair of dissimilar points have a label of -1
-    return (np.vstack([np.column_stack([sim[:, 0], sim[:, 1]]), np.column_stack([dis[:, 0], dis[:, 1]])]),
+
+    # return an array of pairs of indices of shape=(2*len(sim), 2), and the
+    # corresponding labels, array of shape=(2*len(sim))
+    # Each pair of similar points have a label of +1 and each pair of
+    # dissimilar points have a label of -1
+    return (np.vstack([np.column_stack([sim[:, 0], sim[:, 1]]),
+                       np.column_stack([dis[:, 0], dis[:, 1]])]),
             np.concatenate([np.ones(len(sim)), -np.ones(len(sim))]))
 
+
 pairs, pairs_labels = create_constraints(y)
 
 
 ######################################################################
 # Now that we've created our constraints, let's see what it looks like!
-# 
+#
 
 print(pairs)
 print(pairs_labels)