Skip to content

Commit

Permalink
Add to_pandas() method and allow Data creation from pandas DataFrame …
Browse files Browse the repository at this point in the history
…for eamless roundtripping
  • Loading branch information
gb119 committed Aug 16, 2019
1 parent 772902c commit 6ed271a
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 5 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ install:
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
- source activate test-environment
# This will ensure we collect all the necessary dependencies so long as master doesn't change them!
- conda install pandas # Test with pandas installed
- conda install --yes stoner
# Force upgraded scikit-image
# - conda install --yes scikit-image
Expand Down
49 changes: 49 additions & 0 deletions Stoner/Core.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,22 @@
except ImportError:
filemagic = None

try:
import pandas as pd

@pd.api.extensions.register_dataframe_accessor("metadata")
class PandasMetadata(typeHintedDict):

"""Add a typehintedDict to PandasDataFrames."""

def __init__(self, pandas_obj):
super(PandasMetadata, self).__init__()
self._obj = pandas_obj


except ImportError:
pd = None


def __add_core__(other, newdata):
"""Implements the core work of adding other to self and modifying newdata.
Expand Down Expand Up @@ -440,6 +456,29 @@ def _init_single(self, *args, **kargs):
self.metadata = copy.deepcopy(arg.metadata)
self.column_headers = ["X", "Y", "Image Intensity"]
self.setas = "xyz"
elif pd is not None and isinstance(arg, pd.DataFrame):
self.data = arg.values
ch = []
for ix, col in enumerate(arg):
if isinstance(col, string_types):
ch.append(col)
elif isiterable(ch):
for ch_i in col:
if isinstance(ch_i, string_types):
ch.append(ch_i)
break
else:
ch.append("Column {}".format(ix))
else:
ch.append("Column {}:{}", format(ix, ch))
self.column_headers = ch
self.metadata.update(arg.metadata)
if isinstance(arg.columns, pd.MultiIndex) and len(arg.columns.levels) > 1:
for label in arg.columns.get_level_values(1):
if label not in list("xyzdefuvw."):
break
else:
self.setas = list(arg.columns.get_level_values(1))
elif isiterable(arg) and all_type(arg, string_types):
self.column_headers = list(arg)
elif isiterable(arg) and all_type(arg, _np_.ndarray):
Expand Down Expand Up @@ -2817,6 +2856,16 @@ def swap_column(self, *swp, **kargs):
self.data.swap_column(*swp, **kargs)
return self

def to_pandas(self):
if pd is None:
raise NotImplementedError("Pandas not available")
idx = pd.MultiIndex.from_frame(
pd.DataFrame(zip(*[self.column_headers, self.setas]), columns=["Header", "Setas"])
)
df = pd.DataFrame(self.data, columns=idx)
df.metadata.update(self.metadata)
return df

def unique(self, col, return_index=False, return_inverse=False):
"""Return the unique values from the specified column - pass through for numpy.unique.
Expand Down
2 changes: 1 addition & 1 deletion tests/Stoner/folders/test_each.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_each(self):
fldr5.each(hysteresis_correct,setas="3.xy",saturated_fraction=0.25)
self.assertTrue("Hc" in fldr5[0],"Call on DataFolder.each() failed to apply function to folder")
meths=[x for x in dir(fldr6.each) if not x.startswith("_")]
self.assertEqual(len(meths),126 if python_v3 else 129,"Dir of folders.each failed ({}).".format(len(meths)))
self.assertEqual(len(meths),127 if python_v3 else 129,"Dir of folders.each failed ({}).".format(len(meths)))

def test_attr_access(self):
self.fldr=SF.PlotFolder(path.join(self.datadir,"NLIV"),pattern="*.txt",setas="yx")
Expand Down
12 changes: 8 additions & 4 deletions tests/Stoner/test_Core.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ def test_constructor(self):
self.assertEqual(d.shape,(100,3),"Construction from dictionary of columns failed.")
d=Data(False)
self.assertEqual(self.d,d,"Faked file-dialog test")
d=self.d.clone
df=d.to_pandas()
e=Data(df)
self.assertEqual(d,e,"Roundtripping through Pandas DataFrame failed.")

def test_column(self):
for i,c in enumerate(self.d.column_headers):
Expand Down Expand Up @@ -208,10 +212,10 @@ def test_dir(self):
'__le__', '__lt__', '__reversed__', '__slots__',"_abc_negative_cache","_abc_registry",
"_abc_negative_cache_version","_abc_cache","_abc_impl"])
self.attrs=set(dir(self.d))-bad_keys
if len(self.attrs)!=226:
if len(self.attrs)!=227:
expected={'_conv_string', '__str__', 'clear', 'scale', '__add__', 'popitem', 'priority', '_init_single', 'pop', 'reorder_columns', '_col_label', 'subclasses', '__sizeof__', 'rows', 'plot_matrix', '_PlotMixin__SurfPlotter', '_showfig', '__and__', '_repr_html_', '_pop_mask', 'filename', 'smooth', '__weakref__', 'dir', '_PlotMixin__mpl3DQuiver', 'spline', '__format__', 'plot_xy', 'labels', '_fix_kargs', 'span', '__getattr__', '_push_mask', 'normalise', 'mean', 'sort', '_set_mask', 'shape', 'x2', 'clone', 'save', 'setdefault', 'update', 'plot', 'colormap_xyz', 'plot_xyzuvw', '_patterns', 'section', 'count', 'extrapolate', 'fig', 'mime_type', 'find_col', 'SG_Filter', '__isub__', 'clip', '_fix_fig', '_AnalysisMixin__get_math_val', 'get_filename', 'griddata', 'axes', 'setas', '__sub__', '__floordiv__', '_baseclass', '_subplots', '__add_core__', 'max', 'rename', '__setstate__', 'rolling_window', '_labels', '__dict__', 'adjust_setas', 'column', '__mod__', 'xlim', '_repr_table_', '__abstractmethods__', '__call__', '_public_attrs_real', '__eq__', '_pyplot_proxy', 'stitch', 'lmfit', '_filename', '__lshift__', '_fix_titles', 'multiple', '__dir__', 'column_headers', 'header', '_public_attrs', 'swap_column', 'annotate_fit', '_init_many', 'min', '_load', '__sub_core__', 'dtype', '__doc__', '_col_args', 'filter', '__new__', '__len__', 'format', 'ylim', 'ax', '__hash__', '_PlotMixin__figure', 'polyfit', '__repr__', 'subtract', '__iand__', 'debug', 'diffsum', 'split', 'ylabel', '__iter__', '_vector_color', '__invert__', '__repr_core__', 'del_nan', 'y2', '__contains__', '__reduce__', 'plot_xyuv', 'plot_xyuvw', '__delattr__', 'curve_fit', '__module__', '_conv_float', '__getattribute__', 'keys', 'legend', 'quiver_plot', 'metadata', 'plot_xyz', '__regexp_meta__', 'data', 'figure', 'records', '_DataFile__parse_metadata', 'fignum', '__setattr__', 'insert_rows', 'add', 'make_bins', '_DataFile__setattr_col', '_repr_short_', '__getitem__', '_repr_limits', '_data', '_template', 'outlier_detection', 'template', '__imod__', 'image_plot', '__setitem__', '_get_curve_fit_data', '_DataFile__search_index', 'no_fmt', '__class__', '_AnalysisMixin__threshold', 'dims', 'threshold', 'basename', '_AnalysisMixin__lmfit_one', 'del_rows', 'patterns', 'del_column', '_metadata', '__deepcopy__', 'search', '_record_curve_fit_result', 'positional_fmt', '__getstate__', 'interpolate', 'dict_records', '_span_slice', 'columns', 'title', '__delitem__', '_repr_html_private', '_DataFile__file_dialog', '_getattr_col', 'mask', 'add_column', 'subplot', 'subplots', 'peaks', '_MutableMapping__marker', 'subplot2grid', 'get', '_DataFile__read_iterable', 'contour_xyz', 'inset', '__meta__', '_VectorFieldPlot', '__init__', '_masks', 'select', 'unique', 'xlabel', '_Plot', '__iadd__', 'values', 'multiply', '_raise_type_error', 'divide', 'odr', '__reduce_ex__', 'cmap', 'showfig', '__subclasshook__', 'items', '_interesting_cols', '_init_double', '__ne__', '_fix_cols', 'integrate', 'decompose', 'bin', 'closest', '__and_core__', 'T', 'load', 'apply'}
print("="*120,"\n","Warning=====>",self.attrs-expected,expected-self.attrs)
self.assertEqual(len(self.attrs),226,"DataFile.__dir__ failed.")
self.assertEqual(len(self.attrs),227,"DataFile.__dir__ failed.")

def test_filter(self):
self.d._push_mask()
Expand Down Expand Up @@ -385,5 +389,5 @@ def test_metadata_save(self):
#test.test_filter()
# test.test_deltions()
#test.test_dir()
test.test_metadata_save()
#unittest.main()
#test.test_metadata_save()
unittest.main()

0 comments on commit 6ed271a

Please sign in to comment.