Skip to content

Commit

Permalink
bump version 0.5
Browse files Browse the repository at this point in the history
  • Loading branch information
mmourafiq committed May 29, 2018
1 parent d9f785a commit 42227d5
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 26 deletions.
23 changes: 15 additions & 8 deletions pandas_summary/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import numpy as np
import pandas as pd
from pandas.core import common
from pandas.api import types


Expand Down Expand Up @@ -158,7 +157,8 @@ def _get_top_correlations(self, column, threshold=0.65, top=3):
inplace=False)
top_corr = column_corr[(column_corr > threshold)][:top].index
correlations = self.corr[column][top_corr].to_dict()
return ', '.join('{}: {}'.format(col, self._percent(val)) for col, val in correlations.items())
return ', '.join('{}: {}'.format(col, self._percent(val)) for
col, val in correlations.items())

def _get_numeric_summary(self, column, plot=True):
series = self.df[column]
Expand Down Expand Up @@ -196,13 +196,17 @@ def _get_numeric_summary(self, column, plot=True):
stats['deviating_of_median'] = deviating_of_median
stats['deviating_of_median_perc'] = deviating_of_median_perc
stats['top_correlations'] = self._get_top_correlations(column)
return pd.concat([pd.Series(stats, name=column), self.columns_stats.ix[:, column]], sort=True)
return pd.concat([pd.Series(stats, name=column),
self.columns_stats.ix[:, column]],
sort=True)

def _get_date_summary(self, column):
series = self.df[column]
stats = {'min': series.min(), 'max': series.max()}
stats['range'] = stats['max'] - stats['min']
return pd.concat([pd.Series(stats, name=column), self.columns_stats.ix[:, column]], sort=True)
return pd.concat([pd.Series(stats, name=column),
self.columns_stats.ix[:, column]],
sort=True)

def _get_categorical_summary(self, column):
series = self.df[column]
Expand All @@ -211,7 +215,9 @@ def _get_categorical_summary(self, column):
stats = {
'top': '{}: {}'.format(value_counts.index[0], value_counts.iloc[0]),
}
return pd.concat([pd.Series(stats, name=column), self.columns_stats.ix[:, column]], sort=True)
return pd.concat([pd.Series(stats, name=column),
self.columns_stats.ix[:, column]],
sort=True)

def _get_constant_summary(self, column):
return 'This is a constant value: {}'.format(self.df[column][0])
Expand All @@ -225,7 +231,9 @@ def _get_bool_summary(self, column):
stats['"{}" perc'.format(class_name)] = '{}'.format(
self._percent(class_value / self.length))

return pd.concat([pd.Series(stats, name=column), self.columns_stats.ix[:, column]], sort=True)
return pd.concat([pd.Series(stats, name=column),
self.columns_stats.ix[:, column]],
sort=True)

def _get_unique_summary(self, column):
return self.columns_stats.ix[:, column]
Expand Down Expand Up @@ -264,8 +272,7 @@ def get_columns(self, df, usage, columns=None):

if usage == self.INCLUDE:
try:
columns_included = columns_included.intersection(
pd.Index(columns))
columns_included = columns_included.intersection(pd.Index(columns))
except TypeError:
pass
elif usage == self.EXCLUDE:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def run_tests(self):


setup(name='pandas-summary',
version='0.0.41',
version='0.0.5',
description='An extension to pandas describe function.',
maintainer='Mourad Mourafiq',
maintainer_email='mouradmourafiq@gmail.com',
Expand Down
29 changes: 12 additions & 17 deletions tests/test_dataframesummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class DataFrameSummaryTest(unittest.TestCase):

def setUp(self):
self.size = 1000
missing = [np.nan] * (self.size // 10) + list(range(10)) * \
((self.size - self.size // 10) // 10)
missing = ([np.nan] * (self.size // 10) + list(range(10)) *
((self.size - self.size // 10) // 10))
shuffle(missing)

self.types = [DataFrameSummary.TYPE_NUMERIC, DataFrameSummary.TYPE_BOOL,
Expand All @@ -34,8 +34,7 @@ def setUp(self):
'c'.format(i) for i in range(self.size)],
dnumerics1=range(self.size),
dnumerics2=range(self.size, 2 * self.size),
dnumerics3=list(range(self.size - self.size // 10)
) + list(range(-self.size // 10, 0)),
dnumerics3=list(range(self.size - self.size // 10)) + list(range(-self.size // 10, 0)),
dmissing=missing,
dconstant=['a'] * self.size,
ddates=pd.date_range('2010-01-01', periods=self.size, freq='1M')))
Expand Down Expand Up @@ -91,11 +90,12 @@ def test_column_stats_works_as_expected(self):
dtype='object')
expected[['dmissing']] = 100
assert_series_equal(column_stats[self.columns].loc['missing'],
expected[self.columns])
expected[self.columns],
check_dtype=False)

# missing_perc
expected = pd.Series(index=self.columns,
data=['0%'],
data=['0%'] * 10,
name='missing_perc',
dtype='object')

Expand All @@ -105,7 +105,7 @@ def test_column_stats_works_as_expected(self):

# types
expected = pd.Series(index=self.columns,
data=[np.nan],
data=[np.nan] * 10,
name='types',
dtype='object')

Expand Down Expand Up @@ -152,8 +152,7 @@ def test_uniques_summary(self):
DataFrameSummary.TYPE_UNIQUE],
name='duniques',
dtype=object)
assert_series_equal(self.dfs['duniques'],
expected)
assert_series_equal(self.dfs['duniques'], expected)

def test_constant_summary(self):
self.assertEqual(self.dfs['dconstant'], 'This is a constant value: a')
Expand All @@ -172,8 +171,7 @@ def test_bool1_summary(self):
name='dbool1',
dtype=object).sort_index()

assert_series_equal(self.dfs['dbool1'].sort_index(),
expected)
assert_series_equal(self.dfs['dbool1'].sort_index(), expected)

def test_bool2_summary(self):
count_values = self.df['dbool2'].value_counts()
Expand All @@ -200,8 +198,7 @@ def test_categorical_summary(self):
name='dcategoricals',
dtype=object)

assert_series_equal(self.dfs['dcategoricals'],
expected)
assert_series_equal(self.dfs['dcategoricals'], expected)

def test_dates_summary(self):
dmin = self.df['ddates'].min()
Expand All @@ -214,8 +211,7 @@ def test_dates_summary(self):
dtype=object).sort_index()

tmp = self.dfs['ddates'].sort_index()
assert_series_equal(tmp,
expected)
assert_series_equal(tmp, expected)

def test_numerics_summary(self):
num1 = self.df['dnumerics1']
Expand All @@ -242,5 +238,4 @@ def test_numerics_summary(self):
name='dnumerics1',
dtype=object)

assert_series_equal(self.dfs['dnumerics1'],
expected)
assert_series_equal(self.dfs['dnumerics1'], expected)

0 comments on commit 42227d5

Please sign in to comment.