Skip to content

Commit

Permalink
Backport NumPy 1.13.0 fixes to 0.18.X (#9137)
Browse files Browse the repository at this point in the history
* Fix tests on numpy master (#7946)

Until now we were in a edge case on assert_array_equal

* Fix tests on numpy master (#8355)

numpy.apply_along_axis has changed behaviour when the function passed
in returns a 2d array

* [MRG] Updated plot_stock_market.py to use Google Finance (#9010)

* DOC updated plot_stock_market.py to use Google Finance

The implementations is intentionally very basic not to distract the users
from the example. Specifically unlike ``quotes_historical_yahoo_ochl`` it
does not cache downloaded data.

I also had to remove some symbols because the have no data on Google for
the specified date interval. These are WBA, LMT, KFT and MTU.

Closes #8899

* DOC removed plot_stock_market.py from expected failing examples

* Addressed review comments

* Addressed another pass of review comments

* [MRG] Remove DeprecationWarnings in examples due to using floats instead of ints (#8040)
  • Loading branch information
jakirkham authored and amueller committed Jun 19, 2017
1 parent 14031f6 commit 8d9b58b
Show file tree
Hide file tree
Showing 16 changed files with 112 additions and 72 deletions.
72 changes: 51 additions & 21 deletions examples/applications/plot_stock_market.py
Expand Up @@ -64,27 +64,59 @@
# Author: Gael Varoquaux gael.varoquaux@normalesup.org
# License: BSD 3 clause

import datetime
from datetime import datetime

import numpy as np
import matplotlib.pyplot as plt
try:
from matplotlib.finance import quotes_historical_yahoo_ochl
except ImportError:
# quotes_historical_yahoo_ochl was named quotes_historical_yahoo before matplotlib 1.4
from matplotlib.finance import quotes_historical_yahoo as quotes_historical_yahoo_ochl
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
from six.moves.urllib.request import urlopen
from six.moves.urllib.parse import urlencode
from sklearn import cluster, covariance, manifold

###############################################################################
# Retrieve the data from Internet

def quotes_historical_google(symbol, date1, date2):
"""Get the historical data from Google finance.
Parameters
----------
symbol : str
Ticker symbol to query for, for example ``"DELL"``.
date1 : datetime.datetime
Start date.
date2 : datetime.datetime
End date.
Returns
-------
X : array
The columns are ``date`` -- datetime, ``open``, ``high``,
``low``, ``close`` and ``volume`` of type float.
"""
params = urlencode({
'q': symbol,
'startdate': date1.strftime('%b %d, %Y'),
'enddate': date2.strftime('%b %d, %Y'),
'output': 'csv'
})
url = 'http://www.google.com/finance/historical?' + params
with urlopen(url) as response:
dtype = {
'names': ['date', 'open', 'high', 'low', 'close', 'volume'],
'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']
}
converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}
return np.genfromtxt(response, delimiter=',', skip_header=1,
dtype=dtype, converters=converters,
missing_values='-', filling_values=-1)


# Choose a time period reasonably calm (not too long ago so that we get
# high-tech firms, and before the 2008 crash)
d1 = datetime.datetime(2003, 1, 1)
d2 = datetime.datetime(2008, 1, 1)
d1 = datetime(2003, 1, 1)
d2 = datetime(2008, 1, 1)

# kraft symbol has now changed from KFT to MDLZ in yahoo
symbol_dict = {
'TOT': 'Total',
'XOM': 'Exxon',
Expand All @@ -102,7 +134,6 @@
'AMZN': 'Amazon',
'TM': 'Toyota',
'CAJ': 'Canon',
'MTU': 'Mitsubishi',
'SNE': 'Sony',
'F': 'Ford',
'HMC': 'Honda',
Expand All @@ -111,9 +142,8 @@
'BA': 'Boeing',
'KO': 'Coca Cola',
'MMM': '3M',
'MCD': 'Mc Donalds',
'MCD': 'McDonald\'s',
'PEP': 'Pepsi',
'MDLZ': 'Kraft Foods',
'K': 'Kellogg',
'UN': 'Unilever',
'MAR': 'Marriott',
Expand All @@ -129,11 +159,9 @@
'AAPL': 'Apple',
'SAP': 'SAP',
'CSCO': 'Cisco',
'TXN': 'Texas instruments',
'TXN': 'Texas Instruments',
'XRX': 'Xerox',
'LMT': 'Lookheed Martin',
'WMT': 'Wal-Mart',
'WBA': 'Walgreen',
'HD': 'Home Depot',
'GSK': 'GlaxoSmithKline',
'PFE': 'Pfizer',
Expand All @@ -149,14 +177,16 @@

symbols, names = np.array(list(symbol_dict.items())).T

quotes = [quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)
for symbol in symbols]
quotes = [
quotes_historical_google(symbol, d1, d2) for symbol in symbols
]

open = np.array([q.open for q in quotes]).astype(np.float)
close = np.array([q.close for q in quotes]).astype(np.float)
close_prices = np.stack([q['close'] for q in quotes])
open_prices = np.stack([q['open'] for q in quotes])

# The daily variations of the quotes are what carry most information
variation = close - open
variation = close_prices - open_prices


###############################################################################
# Learn a graphical structure from the correlations
Expand Down
2 changes: 1 addition & 1 deletion examples/applications/plot_tomography_l1_reconstruction.py
Expand Up @@ -99,7 +99,7 @@ def build_projection_operator(l_x, n_dir):
def generate_synthetic_data():
""" Synthetic binary data """
rs = np.random.RandomState(0)
n_pts = 36.
n_pts = 36
x, y = np.ogrid[0:l, 0:l]
mask_outer = (x - l / 2) ** 2 + (y - l / 2) ** 2 < (l / 2) ** 2
mask = np.zeros((l, l))
Expand Down
8 changes: 4 additions & 4 deletions examples/classification/plot_digits_classification.py
Expand Up @@ -46,17 +46,17 @@
classifier = svm.SVC(gamma=0.001)

# We learn the digits on the first half of the digits
classifier.fit(data[:n_samples / 2], digits.target[:n_samples / 2])
classifier.fit(data[:n_samples // 2], digits.target[:n_samples // 2])

# Now predict the value of the digit on the second half:
expected = digits.target[n_samples / 2:]
predicted = classifier.predict(data[n_samples / 2:])
expected = digits.target[n_samples // 2:]
predicted = classifier.predict(data[n_samples // 2:])

print("Classification report for classifier %s:\n%s\n"
% (classifier, metrics.classification_report(expected, predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

images_and_predictions = list(zip(digits.images[n_samples / 2:], predicted))
images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted))
for index, (image, prediction) in enumerate(images_and_predictions[:4]):
plt.subplot(2, 4, index + 5)
plt.axis('off')
Expand Down
14 changes: 7 additions & 7 deletions examples/covariance/plot_robust_vs_empirical_covariance.py
Expand Up @@ -67,7 +67,7 @@

range_n_outliers = np.concatenate(
(np.linspace(0, n_samples / 8, 5),
np.linspace(n_samples / 8, n_samples / 2, 5)[1:-1]))
np.linspace(n_samples / 8, n_samples / 2, 5)[1:-1])).astype(np.int)

# definition of arrays to store results
err_loc_mcd = np.zeros((range_n_outliers.size, repeat))
Expand Down Expand Up @@ -135,13 +135,13 @@
plt.errorbar(range_n_outliers, err_cov_mcd.mean(1),
yerr=err_cov_mcd.std(1),
label="Robust covariance (mcd)", color='m')
plt.errorbar(range_n_outliers[:(x_size / 5 + 1)],
err_cov_emp_full.mean(1)[:(x_size / 5 + 1)],
yerr=err_cov_emp_full.std(1)[:(x_size / 5 + 1)],
plt.errorbar(range_n_outliers[:(x_size // 5 + 1)],
err_cov_emp_full.mean(1)[:(x_size // 5 + 1)],
yerr=err_cov_emp_full.std(1)[:(x_size // 5 + 1)],
label="Full data set empirical covariance", color='green')
plt.plot(range_n_outliers[(x_size / 5):(x_size / 2 - 1)],
err_cov_emp_full.mean(1)[(x_size / 5):(x_size / 2 - 1)], color='green',
ls='--')
plt.plot(range_n_outliers[(x_size // 5):(x_size // 2 - 1)],
err_cov_emp_full.mean(1)[(x_size // 5):(x_size // 2 - 1)],
color='green', ls='--')
plt.errorbar(range_n_outliers, err_cov_emp_pure.mean(1),
yerr=err_cov_emp_pure.std(1),
label="Pure data set empirical covariance", color='black')
Expand Down
Expand Up @@ -36,10 +36,10 @@
X = latents + np.random.normal(size=4 * n).reshape((n, 4))
Y = latents + np.random.normal(size=4 * n).reshape((n, 4))

X_train = X[:n / 2]
Y_train = Y[:n / 2]
X_test = X[n / 2:]
Y_test = Y[n / 2:]
X_train = X[:n // 2]
Y_train = Y[:n // 2]
X_test = X[n // 2:]
Y_test = Y[n // 2:]

print("Corr(X)")
print(np.round(np.corrcoef(X.T), 2))
Expand Down
4 changes: 2 additions & 2 deletions examples/decomposition/plot_sparse_coding.py
Expand Up @@ -44,13 +44,13 @@ def ricker_matrix(width, resolution, n_components):
resolution = 1024
subsampling = 3 # subsampling factor
width = 100
n_components = resolution / subsampling
n_components = resolution // subsampling

# Compute a wavelet dictionary
D_fixed = ricker_matrix(width=width, resolution=resolution,
n_components=n_components)
D_multi = np.r_[tuple(ricker_matrix(width=w, resolution=resolution,
n_components=np.floor(n_components / 5))
n_components=n_components // 5)
for w in (10, 50, 100, 500, 1000))]

# Generate a signal
Expand Down
12 changes: 6 additions & 6 deletions examples/exercises/plot_iris_exercise.py
Expand Up @@ -29,10 +29,10 @@
X = X[order]
y = y[order].astype(np.float)

X_train = X[:.9 * n_sample]
y_train = y[:.9 * n_sample]
X_test = X[.9 * n_sample:]
y_test = y[.9 * n_sample:]
X_train = X[:int(.9 * n_sample)]
y_train = y[:int(.9 * n_sample)]
X_test = X[int(.9 * n_sample):]
y_test = y[int(.9 * n_sample):]

# fit the model
for fig_num, kernel in enumerate(('linear', 'rbf', 'poly')):
Expand All @@ -58,8 +58,8 @@
# Put the result into a color plot
Z = Z.reshape(XX.shape)
plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'],
levels=[-.5, 0, .5])
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
linestyles=['--', '-', '--'], levels=[-.5, 0, .5])

plt.title(kernel)
plt.show()
4 changes: 2 additions & 2 deletions examples/linear_model/plot_lasso_and_elasticnet.py
Expand Up @@ -32,8 +32,8 @@

# Split data in train set and test set
n_samples = X.shape[0]
X_train, y_train = X[:n_samples / 2], y[:n_samples / 2]
X_test, y_test = X[n_samples / 2:], y[n_samples / 2:]
X_train, y_train = X[:n_samples // 2], y[:n_samples // 2]
X_test, y_test = X[n_samples // 2:], y[n_samples // 2:]

###############################################################################
# Lasso
Expand Down
8 changes: 4 additions & 4 deletions examples/neighbors/plot_kde_1d.py
Expand Up @@ -38,8 +38,8 @@
# Plot the progression of histograms to kernels
np.random.seed(1)
N = 20
X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
X = np.concatenate((np.random.normal(0, 1, int(0.3 * N)),
np.random.normal(5, 1, int(0.7 * N))))[:, np.newaxis]
X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
bins = np.linspace(-5, 10, 10)

Expand Down Expand Up @@ -116,8 +116,8 @@ def format_func(x, loc):
# Plot a 1D density example
N = 100
np.random.seed(1)
X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
X = np.concatenate((np.random.normal(0, 1, int(0.3 * N)),
np.random.normal(5, 1, int(0.7 * N))))[:, np.newaxis]

X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]

Expand Down
8 changes: 5 additions & 3 deletions examples/plot_kernel_approximation.py
Expand Up @@ -68,12 +68,14 @@
data -= data.mean(axis=0)

# We learn the digits on the first half of the digits
data_train, targets_train = data[:n_samples / 2], digits.target[:n_samples / 2]
data_train, targets_train = (data[:n_samples // 2],
digits.target[:n_samples // 2])


# Now predict the value of the digit on the second half:
data_test, targets_test = data[n_samples / 2:], digits.target[n_samples / 2:]
#data_test = scaler.transform(data_test)
data_test, targets_test = (data[n_samples // 2:],
digits.target[n_samples // 2:])
# data_test = scaler.transform(data_test)

# Create a classifier: a support vector classifier
kernel_svm = svm.SVC(gamma=.2)
Expand Down
6 changes: 3 additions & 3 deletions examples/plot_kernel_ridge_regression.py
Expand Up @@ -54,7 +54,7 @@
y = np.sin(X).ravel()

# Add noise to targets
y[::5] += 3 * (0.5 - rng.rand(X.shape[0]/5))
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))

X_plot = np.linspace(0, 5, 100000)[:, None]

Expand Down Expand Up @@ -119,8 +119,8 @@
# Generate sample data
X = 5 * rng.rand(10000, 1)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(X.shape[0]/5))
sizes = np.logspace(1, 4, 7)
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))
sizes = np.logspace(1, 4, 7, dtype=np.int)
for name, estimator in {"KRR": KernelRidge(kernel='rbf', alpha=0.1,
gamma=10),
"SVR": SVR(kernel='rbf', C=1e1, gamma=10)}.items():
Expand Down
11 changes: 6 additions & 5 deletions examples/plot_multioutput_face_completion.py
Expand Up @@ -39,10 +39,12 @@
test = test[face_ids, :]

n_pixels = data.shape[1]
X_train = train[:, :np.ceil(0.5 * n_pixels)] # Upper half of the faces
y_train = train[:, np.floor(0.5 * n_pixels):] # Lower half of the faces
X_test = test[:, :np.ceil(0.5 * n_pixels)]
y_test = test[:, np.floor(0.5 * n_pixels):]
# Upper half of the faces
X_train = train[:, :(n_pixels + 1) // 2]
# Lower half of the faces
y_train = train[:, n_pixels // 2:]
X_test = test[:, :(n_pixels + 1) // 2]
y_test = test[:, n_pixels // 2:]

# Fit estimators
ESTIMATORS = {
Expand Down Expand Up @@ -74,7 +76,6 @@
sub = plt.subplot(n_faces, n_cols, i * n_cols + 1,
title="true faces")


sub.axis("off")
sub.imshow(true_face.reshape(image_shape),
cmap=plt.cm.gray,
Expand Down
4 changes: 2 additions & 2 deletions examples/svm/plot_svm_scale_c.py
Expand Up @@ -106,8 +106,8 @@

# l2 data: non sparse, but less features
y_2 = np.sign(.5 - rnd.rand(n_samples))
X_2 = rnd.randn(n_samples, n_features / 5) + y_2[:, np.newaxis]
X_2 += 5 * rnd.randn(n_samples, n_features / 5)
X_2 = rnd.randn(n_samples, n_features // 5) + y_2[:, np.newaxis]
X_2 += 5 * rnd.randn(n_samples, n_features // 5)

clf_sets = [(LinearSVC(penalty='l1', loss='squared_hinge', dual=False,
tol=1e-3),
Expand Down
2 changes: 1 addition & 1 deletion examples/tree/plot_unveil_tree_structure.py
Expand Up @@ -54,7 +54,7 @@

# The tree structure can be traversed to compute various properties such
# as the depth of each node and whether or not it is a leaf.
node_depth = np.zeros(shape=n_nodes)
node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, -1)] # seed is the root node id and its parent depth
while len(stack) > 0:
Expand Down
2 changes: 1 addition & 1 deletion sklearn/gaussian_process/kernels.py
Expand Up @@ -1852,7 +1852,7 @@ def diag(self, X):
Diagonal of kernel k(X, X)
"""
# We have to fall back to slow way of computing diagonal
return np.apply_along_axis(self, 1, X)[:, 0]
return np.apply_along_axis(self, 1, X).ravel()

def is_stationary(self):
"""Returns whether the kernel is stationary. """
Expand Down
19 changes: 13 additions & 6 deletions sklearn/model_selection/tests/test_split.py
Expand Up @@ -1028,16 +1028,23 @@ def test_cv_iterable_wrapper():
# Since the wrapped iterable is enlisted and stored,
# split can be called any number of times to produce
# consistent results.
assert_array_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_iter_wrapped.split(X, y)))
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_iter_wrapped.split(X, y)))
# If the splits are randomized, successive calls to split yields different
# results
kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y)
kf_randomized_iter_wrapped = check_cv(kf_randomized_iter)
assert_array_equal(list(kf_randomized_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
assert_true(np.any(np.array(list(kf_iter_wrapped.split(X, y))) !=
np.array(list(kf_randomized_iter_wrapped.split(X, y)))))
np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))

try:
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
splits_are_equal = True
except AssertionError:
splits_are_equal = False
assert_false(splits_are_equal, "If the splits are randomized, "
"successive calls to split should yield different results")


def test_group_kfold():
Expand Down

0 comments on commit 8d9b58b

Please sign in to comment.