Skip to content

Commit

Permalink
technical edits to README.md example
Browse files Browse the repository at this point in the history
  • Loading branch information
kmayerb committed May 31, 2020
1 parent 4964fbd commit 9d93f69
Show file tree
Hide file tree
Showing 2 changed files with 195 additions and 48 deletions.
146 changes: 108 additions & 38 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,64 +1,47 @@
# zipdist

Keeping numpy and pandas attributes of python classes nice and tidy
Keeping NumPy and Pandas attributes of python classes nice and tidy

[![Build Status](https://travis-ci.com/kmayerb/zipdist.svg?branch=master)](https://travis-ci.com/kmayerb/zipdist)
[![Coverage Status](https://coveralls.io/repos/github/kmayerb/zipdist/badge.svg?branch=master)](https://coveralls.io/github/kmayerb/zipdist?branch=master)

## Example

Suppose you have a Python class Skinner.
You load it up with two attributes:
The Zipdist parent class provides methods to classes for saving
NumPy arrays and pandas DataFrame object attributes in a single
`.tar.gz` file and reloading those attributes back into a newinstance.

* `bart` a basic numpy array of zeros, and
* `lisa` a smart pandas DataFrame

Because Y inherits methods from the parent Zipdist (in particular
`_save()`, and `_build()`, you can archive the non-json serializable attributes
of your class, and rebuild the attributes from the original
instance directly from `Simpsons.tar.gz`.

As an added benefit, the .tar.gz provides a tiddy human readable
record of your python class attributes as .csv files,
which you can port on over to R, Excel, Julia, or Matlab.
As an added benefit, the `.tar.gz` provides a tidy human-readable
record of your Python class attributes as .csv files,
which you can port on over to another platform like
R, Excel, Julia, or Matlab.

## Basic Example


```python

``` python
from zipdist.zip import Zipdist
import pandas as pd
import numpy as np
import sys
import os

class Skinner(Zipdist):
class Y(Zipdist):
def __init__(self, name):
self.name = name
self.year = None

# here you have an <Willy> and instance of the class Skinner.
Willy= Skinner(name ='Simpsons')
Willy.years = [2020,2019]
Willy.bart = np.zeros(10)
Willy.lisa = pd.DataFrame([{"Pi":3.1415,"e":2.7182}])
Willy._save(dest="Simpsons", dest_tar = "Simpsons.tar.gz")
assert os.path.isfile("Simpsons.tar.gz")

# Create a new instance, Chalmers. It has no attributes 'bart' or 'lisa'
Chalmers = Skinner("Simpsons")
assert 'bart' not in Chalmers.__dict__.keys()
assert 'years' not in Chalmers.__dict__.keys()
y = Y(name ='Simpsons')
y.years = [1989, 2020]
y.bart = np.zeros(10)
y.lisa = pd.DataFrame([{"Pi":3.1415,"e":2.7182}])
y._save(dest="Simpsons", dest_tar = "Simpsons.tar.gz")

# But you can rebuilt the attributes from Willy directly from a archived .tar.gz file
Chalmers._build(dest="Simpsons", dest_tar = "Simpsons.tar.gz")
assert isinstance(Chalmers.bart, np.ndarray)
assert isinstance(Chalmers.lisa, pd.DataFrame)

ynew = Y(name = "Simpsons")
ynew._build(dest="Simpsons", dest_tar = "Simpsons.tar.gz")
sys.stdout.write(f"ynew.years: {ynew.years}\n")
sys.stdout.write(f"ynew.lisa:\n{ynew.lisa}\n")
sys.stdout.write(f"ynew.bart: {ynew.bart}\n")
```

Here is what Zipdist does

```bash
Saving bart to .csv : Simpsons/bart.cs
Saving lisa to .csv : Simpsons/lisa.csv
Expand All @@ -76,3 +59,90 @@ setting simple attribute years to [2020, 2019]
setting [csv] to [np.ndarray] for attribute bart from: Simpsons/bart.cs
setting [csv] to [pd.DataFrame] for attribute lisa from: Simpsons/lisa.csv
```

```
ynew.years: [1989, 2020]
ynew.lisa:
Pi e
0 3.1415 2.7182
ynew.bart: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
```

## Explanation


```python
from zipdist.zip import Zipdist
import pandas as pd
import numpy as np
import sys
import os
```

Suppose you have some class `Y`. Let it inherit methods from Zipdist

```
class Y(Zipdist):
def __init__(self, name):
self.name = name
self.year = None
```

Say you instantiate an instance of `Y`, assigning it two new complex attributes:

* `bart` a basic numpy array of zeros, and
* `lisa` a smart pandas DataFrame

```python
y = Y(name ='Simpsons')
y.years = [1989, 2020]
y.bart = np.zeros(10)
y.lisa = pd.DataFrame([{"Pi":3.1415,"e":2.7182}])
```

### `_save()`

Because Y inherits methods from the parent Zipdist (in particular
`_save()`, and `_build()`, you can archive the non-json serializable attributes
of your class, and rebuild the attributes from the original
instance directly from `Simpsons.tar.gz`.

```python
# ._save creates the file Simpsons.tar.gz
y._save(dest="Simpsons", dest_tar = "Simpsons.tar.gz")
assert os.path.isfile("Simpsons.tar.gz")
```

### `_build()`

Suppose you want to rebuild the instance. (Note, it has no attributes `bart` or `lisa`)

```
ynew = Y("Simpsons")
assert 'bart' not in ynew.__dict__.keys()
assert 'years' not in ynew.__dict__.keys()
```
But you can rebuilt the attributes from Willy directly from a archived .tar.gz file
``` python
ynew._build(dest="Simpsons", dest_tar = "Simpsons.tar.gz")
assert isinstance(ynew.bart, np.ndarray)
assert isinstance(ynex.lisa, pd.DataFrame)
```

### `_ready()`

You can also add simple (i.e., json serializable) or complex (numpy and panadas attributes) one by one as desired with `_ready()`
and `_reload_complex()` and `_reload_simple('years')`

```python
ynew = Y("Simpsons")
assert 'lisa' not in ynew.__dict__.keys()
assert 'bart' not in ynew.__dict__.keys()
assert 'years' not in ynew.__dict__.keys()
ynew._ready(dest="Simpsons", dest_tar = "Simpsons.tar.gz")
ynew._reload_complex('lisa')
assert isinstance(ynew.lisa, pd.DataFrame)
ynew._reload_simple('years')
assert isinstance(ynew.years, list)
```

97 changes: 87 additions & 10 deletions zipdist/tests/test_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,33 +61,96 @@ def test_Zipdist_read():
z._make_dest_directory("blah")
assert os.path.isdir("blah")

""" Integration Tests """

def test_basic_example():
""" Do a full _save and _build basic example """
def test_Zipdist_with_deeper_path():
"""
Supposing that we want to store .tag.gz in a deeper folder ./layer1
instead of working dir.
"""
if not os.path.isdir('layer1'):
os.mkdir("layer1")
class Y(Zipdist):
def __init__(self, name):
self.name = name
self.year = None

y = Y('Simpsons')
y = Y('Flanders')
y.years = [2020,2019]
y.bart = np.zeros(10)
y.lisa = pd.DataFrame([{"Pi":3.1415,"e":2.7182}])
y._save()
assert os.path.isfile("Simpsons.tar.gz")
y._save(dest = "layer1/Flanders", dest_tar = "layer1/Flanders.tar.gz" )
assert os.path.isfile("layer1/Flanders.tar.gz")

# Create a New Object, None of the prior attributes exists
y2 = Y("Flanders")
assert 'bart' not in y2.__dict__.keys()
assert 'years' not in y2.__dict__.keys()
# But you can rebuilt it
y2._build(dest="layer1/Flanders", dest_tar = "layer1/Flanders.tar.gz")
assert isinstance(y2.bart, np.ndarray)
assert isinstance(y2.lisa, pd.DataFrame)


def test_Zipdist_with_shallower_path():
"""
Supposing that we want to store .tag.gz in a deeper folder ./layer1
instead of working dir.
"""
class Y(Zipdist):
def __init__(self, name):
self.name = name
self.year = None

y = Y('Flanders')
y.years = [2020,2019]
y.bart = np.zeros(10)
y.lisa = pd.DataFrame([{"Pi":3.1415,"e":2.7182}])
y._save(dest = "../Springfield", dest_tar = "../Springfield.tar.gz" )
assert os.path.isfile("layer1/Flanders.tar.gz")

# Create a New Object, None of the prior attributes exists
assert os.path.isfile("Simpsons.tar.gz")
y2 = Y("Simpsons")
y2 = Y("Flanders")
assert 'bart' not in y2.__dict__.keys()
assert 'years' not in y2.__dict__.keys()
# But you can rebuilt it
y2._build(dest="Simpsons", dest_tar = "Simpsons.tar.gz")
y2._build(dest = "../Springfield", dest_tar = "../Springfield.tar.gz")
assert isinstance(y2.bart, np.ndarray)
assert isinstance(y2.lisa, pd.DataFrame)




""" Integration Tests """
def test_basic_example():
""" Do a full _save and _build basic example """
class Y(Zipdist):
def __init__(self, name):
self.name = name
self.year = None

y = Y(name ='Simpsons')
y.years = [1989, 2020]
y.bart = np.zeros(10)
y.lisa = pd.DataFrame([{"Pi":3.1415,"e":2.7182}])
y._save(dest="Simpsons", dest_tar = "Simpsons.tar.gz")


ynew = Y(name = "Simpsons")
ynew._build(dest="Simpsons", dest_tar = "Simpsons.tar.gz")
sys.stdout.write(f"ynew.years: {ynew.years}\n")
sys.stdout.write(f"ynew.lisa:\n{ynew.lisa}\n")
sys.stdout.write(f"ynew.bart: {ynew.bart}\n")
ynew = Y("Simpsons")
assert 'lisa' not in ynew.__dict__.keys()
assert 'bart' not in ynew.__dict__.keys()
assert 'years' not in ynew.__dict__.keys()
ynew._ready(dest="Simpsons", dest_tar = "Simpsons.tar.gz")
ynew._reload_complex('lisa')
assert isinstance(ynew.lisa, pd.DataFrame)
ynew._reload_simple('years')
assert isinstance(ynew.years, list)


def test_Zipdist_save_using_name_attribute_only():
"""
What about case were user does not specify dest and dest directly
Expand Down Expand Up @@ -143,7 +206,7 @@ def __init__(self, name):
y2._reload_complex(k ='lisa')
assert isinstance(y2.lisa, pd.DataFrame)

def test_Zipdist_relad_simple():
def test_Zipdist_reload_simple():
""" Example Where only one attribute is loaded using _ready, _reload_complex"""
class Y(Zipdist):
def __init__(self, name):
Expand Down Expand Up @@ -227,6 +290,20 @@ def __init__(self, name):
assert w[0].message.args[0] == "Could not reload simple attribute maggie"


def test_cleanups():
""" Adds cleanup of folders and .tar.gz files produced during testing"""
os.system(f"rm -rf Simpsons")
os.system(f"rm -rf siblings")
os.system(f"rm -rf testname")
os.system(f"rm -rf blah")
os.system(f"rm -rf layer1")
os.system(f"rm -rf Flanders")
os.system(f"rm -rf Springfield")
os.system(f"rm Simpsons.tar.gz")
os.system(f"rm testname.tar.gz")
os.system(f"rm siblings.tar.gz")





Expand Down

0 comments on commit 9d93f69

Please sign in to comment.