/
_fetchers.py
220 lines (182 loc) · 6.6 KB
/
_fetchers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
from numpy import array, frombuffer, load
from ._registry import registry, registry_urls
try:
import pooch
except ImportError:
pooch = None
data_fetcher = None
else:
data_fetcher = pooch.create(
# Use the default cache folder for the operating system
# Pooch uses appdirs (https://github.com/ActiveState/appdirs) to
# select an appropriate directory for the cache on each platform.
path=pooch.os_cache("scipy-data"),
# The remote data is on Github
# base_url is a required param, even though we override this
# using individual urls in the registry.
base_url="https://github.com/scipy/",
registry=registry,
urls=registry_urls
)
def fetch_data(dataset_name, data_fetcher=data_fetcher):
if data_fetcher is None:
raise ImportError("Missing optional dependency 'pooch' required "
"for scipy.datasets module. Please use pip or "
"conda to install 'pooch'.")
# The "fetch" method returns the full path to the downloaded data file.
return data_fetcher.fetch(dataset_name)
def ascent():
"""
Get an 8-bit grayscale bit-depth, 512 x 512 derived image for easy
use in demos.
The image is derived from accent-to-the-top.jpg at
http://www.public-domain-image.com/people-public-domain-images-pictures/
Parameters
----------
None
Returns
-------
ascent : ndarray
convenient image to use for testing and demonstration
Examples
--------
>>> import scipy.datasets
>>> ascent = scipy.datasets.ascent()
>>> ascent.shape
(512, 512)
>>> ascent.max()
255
>>> import matplotlib.pyplot as plt
>>> plt.gray()
>>> plt.imshow(ascent)
>>> plt.show()
"""
import pickle
# The file will be downloaded automatically the first time this is run,
# returning the path to the downloaded file. Afterwards, Pooch finds
# it in the local cache and doesn't repeat the download.
fname = fetch_data("ascent.dat")
# Now we just need to load it with our standard Python tools.
with open(fname, 'rb') as f:
ascent = array(pickle.load(f))
return ascent
def electrocardiogram():
"""
Load an electrocardiogram as an example for a 1-D signal.
The returned signal is a 5 minute long electrocardiogram (ECG), a medical
recording of the heart's electrical activity, sampled at 360 Hz.
Returns
-------
ecg : ndarray
The electrocardiogram in millivolt (mV) sampled at 360 Hz.
Notes
-----
The provided signal is an excerpt (19:35 to 24:35) from the `record 208`_
(lead MLII) provided by the MIT-BIH Arrhythmia Database [1]_ on
PhysioNet [2]_. The excerpt includes noise induced artifacts, typical
heartbeats as well as pathological changes.
.. _record 208: https://physionet.org/physiobank/database/html/mitdbdir/records.htm#208
.. versionadded:: 1.1.0
References
----------
.. [1] Moody GB, Mark RG. The impact of the MIT-BIH Arrhythmia Database.
IEEE Eng in Med and Biol 20(3):45-50 (May-June 2001).
(PMID: 11446209); :doi:`10.13026/C2F305`
.. [2] Goldberger AL, Amaral LAN, Glass L, Hausdorff JM, Ivanov PCh,
Mark RG, Mietus JE, Moody GB, Peng C-K, Stanley HE. PhysioBank,
PhysioToolkit, and PhysioNet: Components of a New Research Resource
for Complex Physiologic Signals. Circulation 101(23):e215-e220;
:doi:`10.1161/01.CIR.101.23.e215`
Examples
--------
>>> from scipy.datasets import electrocardiogram
>>> ecg = electrocardiogram()
>>> ecg
array([-0.245, -0.215, -0.185, ..., -0.405, -0.395, -0.385])
>>> ecg.shape, ecg.mean(), ecg.std()
((108000,), -0.16510875, 0.5992473991177294)
As stated the signal features several areas with a different morphology.
E.g., the first few seconds show the electrical activity of a heart in
normal sinus rhythm as seen below.
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> fs = 360
>>> time = np.arange(ecg.size) / fs
>>> plt.plot(time, ecg)
>>> plt.xlabel("time in s")
>>> plt.ylabel("ECG in mV")
>>> plt.xlim(9, 10.2)
>>> plt.ylim(-1, 1.5)
>>> plt.show()
After second 16, however, the first premature ventricular contractions,
also called extrasystoles, appear. These have a different morphology
compared to typical heartbeats. The difference can easily be observed
in the following plot.
>>> plt.plot(time, ecg)
>>> plt.xlabel("time in s")
>>> plt.ylabel("ECG in mV")
>>> plt.xlim(46.5, 50)
>>> plt.ylim(-2, 1.5)
>>> plt.show()
At several points large artifacts disturb the recording, e.g.:
>>> plt.plot(time, ecg)
>>> plt.xlabel("time in s")
>>> plt.ylabel("ECG in mV")
>>> plt.xlim(207, 215)
>>> plt.ylim(-2, 3.5)
>>> plt.show()
Finally, examining the power spectrum reveals that most of the biosignal is
made up of lower frequencies. At 60 Hz the noise induced by the mains
electricity can be clearly observed.
>>> from scipy.signal import welch
>>> f, Pxx = welch(ecg, fs=fs, nperseg=2048, scaling="spectrum")
>>> plt.semilogy(f, Pxx)
>>> plt.xlabel("Frequency in Hz")
>>> plt.ylabel("Power spectrum of the ECG in mV**2")
>>> plt.xlim(f[[0, -1]])
>>> plt.show()
"""
fname = fetch_data("ecg.dat")
with load(fname) as file:
ecg = file["ecg"].astype(int) # np.uint16 -> int
# Convert raw output of ADC to mV: (ecg - adc_zero) / adc_gain
ecg = (ecg - 1024) / 200.0
return ecg
def face(gray=False):
"""
Get a 1024 x 768, color image of a raccoon face.
raccoon-procyon-lotor.jpg at http://www.public-domain-image.com
Parameters
----------
gray : bool, optional
If True return 8-bit grey-scale image, otherwise return a color image
Returns
-------
face : ndarray
image of a raccoon face
Examples
--------
>>> import scipy.datasets
>>> face = scipy.datasets.face()
>>> face.shape
(768, 1024, 3)
>>> face.max()
255
>>> face.dtype
dtype('uint8')
>>> import matplotlib.pyplot as plt
>>> plt.gray()
>>> plt.imshow(face)
>>> plt.show()
"""
import bz2
fname = fetch_data("face.dat")
with open(fname, 'rb') as f:
rawdata = f.read()
face_data = bz2.decompress(rawdata)
face = frombuffer(face_data, dtype='uint8')
face.shape = (768, 1024, 3)
if gray is True:
face = (0.21 * face[:, :, 0] + 0.71 * face[:, :, 1] +
0.07 * face[:, :, 2]).astype('uint8')
return face