-
-
Notifications
You must be signed in to change notification settings - Fork 19.2k
Description
Code Sample
plt.figure(figsize = (10,12))
for i, source in enumerate(['EXT_SOURCE_1','EXT_SOURCE_2','EXT_SOURCE_3']):
# create subplot for each source
plt.subplot(3, 1, i+1)
sns.kdeplot(train.loc[train['TARGET'] == 0, source], label = 'target == 0')
sns.kdeplot(train.loc[train['TARGET'] == 1, source], label = 'target == 1')
plt.title('Distribution of %s by Target Value' % source)
plt.xlabel('%s' % source)
plt.ylabel('Density')
plt.tight_layout(h_pad = 2.5)
ValueError Traceback (most recent call last)
in ()
10 # create subplot for each source
11 plt.subplot(3, 1, i+1)
---> 12 sns.kdeplot(train.loc[train['TARGET'] == 0, source], label = 'target == 0')
13 sns.kdeplot(train.loc[train['TARGET'] == 1, source], label = 'target == 1')
14
~\AppData\Roaming\Python\Python37\site-packages\seaborn\distributions.py in kdeplot(data, data2, shade, vertical, kernel, bw, gridsize, cut, clip, legend, cumulative, shade_lowest, cbar, cbar_ax, cbar_kws, ax, **kwargs)
689 ax = _univariate_kdeplot(data, shade, vertical, kernel, bw,
690 gridsize, cut, clip, legend, ax,
--> 691 cumulative=cumulative, **kwargs)
692
693 return ax
~\AppData\Roaming\Python\Python37\site-packages\seaborn\distributions.py in _univariate_kdeplot(data, shade, vertical, kernel, bw, gridsize, cut, clip, legend, ax, cumulative, **kwargs)
292 "only implemented in statsmodels."
293 "Please install statsmodels.")
--> 294 x, y = _scipy_univariate_kde(data, bw, gridsize, cut, clip)
295
296 # Make sure the density is nonnegative
~\AppData\Roaming\Python\Python37\site-packages\seaborn\distributions.py in _scipy_univariate_kde(data, bw, gridsize, cut, clip)
364 """Compute a univariate kernel density estimate using scipy."""
365 try:
--> 366 kde = stats.gaussian_kde(data, bw_method=bw)
367 except TypeError:
368 kde = stats.gaussian_kde(data)
~\AppData\Roaming\Python\Python37\site-packages\scipy\stats\kde.py in init(self, dataset, bw_method)
170
171 self.d, self.n = self.dataset.shape
--> 172 self.set_bandwidth(bw_method=bw_method)
173
174 def evaluate(self, points):
~\AppData\Roaming\Python\Python37\site-packages\scipy\stats\kde.py in set_bandwidth(self, bw_method)
497 raise ValueError(msg)
498
--> 499 self._compute_covariance()
500
501 def _compute_covariance(self):
~\AppData\Roaming\Python\Python37\site-packages\scipy\stats\kde.py in _compute_covariance(self)
508 self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
509 bias=False))
--> 510 self._data_inv_cov = linalg.inv(self._data_covariance)
511
512 self.covariance = self._data_covariance * self.factor**2
~\AppData\Roaming\Python\Python37\site-packages\scipy\linalg\basic.py in inv(a, overwrite_a, check_finite)
944
945 """
--> 946 a1 = _asarray_validated(a, check_finite=check_finite)
947 if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
948 raise ValueError('expected square matrix')
~\AppData\Roaming\Python\Python37\site-packages\scipy_lib_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
236 raise ValueError('masked arrays are not supported')
237 toarray = np.asarray_chkfinite if check_finite else np.asarray
--> 238 a = toarray(a)
239 if not objects_ok:
240 if a.dtype is np.dtype('O'):
~\AppData\Roaming\Python\Python37\site-packages\numpy\lib\function_base.py in asarray_chkfinite(a, dtype, order)
459 if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
460 raise ValueError(
--> 461 "array must not contain infs or NaNs")
462 return a
463
ValueError: array must not contain infs or NaNs
Problem description
As far as I'm aware this issue has been raised before (#14821) and fixed but I seem to have found another example of it. Plots an empty plot.
train dataframe is 'application_test.csv' from https://www.kaggle.com/c/home-credit-default-risk/download/application_test.csv
Couldn't replicate with an example easy, example in #14821 will plot.
Expected Output
Like #14821, issue is resolved by using dropna().
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.7.0.final.0
python-bits: 64
OS: Windows
OS-release: 10
machine: AMD64
processor: Intel64 Family 6 Model 42 Stepping 7, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None
pandas: 0.23.4
pytest: None
pip: 18.0
setuptools: 39.0.1
Cython: None
numpy: 1.15.1
scipy: 1.1.0
pyarrow: None
xarray: None
IPython: 6.5.0
sphinx: None
patsy: None
dateutil: 2.7.3
pytz: 2018.5
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: 2.2.3
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: 1.0.1
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None