Example Usage for Raccoon
========================

In [2]:
# import libraries
import sys
sys.path.append("..")
import raccoon as rc

Initialize
----------

In [3]:
# empty DataFrame
df = rc.DataFrame()
df

object id: 48079760
columns:
blist([])
data:
blist([])
index:
blist([])

In [4]:
# with columns and indexes but no data
df = rc.DataFrame(columns=['a', 'b', 'c'], index=[1, 2, 3])
df

object id: 48579376
columns:
blist(['a', 'b', 'c'])
data:
blist([blist([None, None, None]), blist([None, None, None]), blist([None, None, None])])
index:
blist([1, 2, 3])

In [5]:
# with data
df = rc.DataFrame(data={'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[10, 11, 12], columns=['a', 'b'])
df

object id: 48579888
columns:
blist(['a', 'b'])
data:
blist([blist([1, 2, 3]), blist([4, 5, 6])])
index:
blist([10, 11, 12])

Print
-----

In [6]:
df.print()

  index    a    b
-------  ---  ---
     10    1    4
     11    2    5
     12    3    6


Setters and Getters
-------------------

In [7]:
# columns
df.columns

blist(['a', 'b'])

In [8]:
df.columns = ['first', 'second']
print(df)

  index    first    second
-------  -------  --------
     10        1         4
     11        2         5
     12        3         6


In [9]:
# columns can be renamed with a dict()
df.rename_columns({'second': 'b', 'first': 'a'})
df.columns

blist(['a', 'b'])

In [10]:
# index
df.index

blist([10, 11, 12])

In [11]:
#indexes can be any non-repeating unique values
df.index = ['apple', 'pear', 7.7]
df.print()

index      a    b
-------  ---  ---
apple      1    4
pear       2    5
7.7        3    6


In [12]:
df.index = [10, 11, 12]

In [13]:
# the index can also have a name, befault it is "index"
df.index_name

'index'

In [14]:
df.index_name = 'units'
df.index_name

'units'

In [15]:
# data is a shallow copy, be careful on how this is used
df.index_name = 'index'
df.data

blist([blist([1, 2, 3]), blist([4, 5, 6])])

Select Index
------------

In [16]:
df.select_index(11)

[False, True, False]

Set Values
----------

In [17]:
# set a single cell
df.set(10, 'a', 100)
df.print()

  index    a    b
-------  ---  ---
     10  100    4
     11    2    5
     12    3    6


In [18]:
# set a value outside current range creates a new row and/or column. Can also use [] for setting
df[13, 'c'] = 9
df.print()

  index    a    b    c
-------  ---  ---  ---
     10  100    4
     11    2    5
     12    3    6
     13              9


In [19]:
# set column
df['b'] = 55
df.print()

  index    a    b    c
-------  ---  ---  ---
     10  100   55
     11    2   55
     12    3   55
     13        55    9


In [20]:
# set a subset of column
df[[10, 12], 'b'] = 66
print(df)

  index    a    b    c
-------  ---  ---  ---
     10  100   66
     11    2   55
     12    3   66
     13        55    9


In [21]:
# using boolean list
df.set([True, False, True, False], 'b', [88, 99])
print(df)

  index    a    b    c
-------  ---  ---  ---
     10  100   88
     11    2   55
     12    3   99
     13        55    9


In [22]:
# setting with slices
df[12:13, 'a'] = 33
print(df)

  index    a    b    c
-------  ---  ---  ---
     10  100   88
     11    2   55
     12   33   99
     13   33   55    9


In [23]:
df[10:12, 'c'] = [1, 2, 3]
print(df)

  index    a    b    c
-------  ---  ---  ---
     10  100   88    1
     11    2   55    2
     12   33   99    3
     13   33   55    9


Get Values
----------

In [24]:
# get a single cell
df[10, 'a']

100

In [25]:
# get an entire column
df['c'].print()

  index    c
-------  ---
     10    1
     11    2
     12    3
     13    9


In [26]:
# get list of columns
df[['a', 'c']].print()

  index    a    c
-------  ---  ---
     10  100    1
     11    2    2
     12   33    3
     13   33    9


In [27]:
# get subset of the index
df[[11, 12, 13], 'b'].print()

  index    b
-------  ---
     11   55
     12   99
     13   55


In [28]:
# get using slices
df[11:13, 'b'].print()

  index    b
-------  ---
     11   55
     12   99
     13   55


In [29]:
# get a matrix
df[10:11, ['a', 'c']].print()

  index    a    c
-------  ---  ---
     10  100    1
     11    2    2


Head and Tail
-------------

In [30]:
df.head(2).print()

  index    a    b    c
-------  ---  ---  ---
     10  100   88    1
     11    2   55    2


In [31]:
df.tail(2).print()

  index    a    b    c
-------  ---  ---  ---
     12   33   99    3
     13   33   55    9


Delete colunmns and rows
------------------------

In [32]:
df.delete_rows([10, 13])
print(df)

  index    a    b    c
-------  ---  ---  ---
     11    2   55    2
     12   33   99    3


In [33]:
df.delete_columns('b')
print(df)

  index    a    c
-------  ---  ---
     11    2    2
     12   33    3


Convert
-------

In [34]:
# return a dict
df.to_dict()

{'a': blist([2, 33]), 'c': blist([2, 3]), 'index': blist([11, 12])}

In [35]:
# exclude the index
df.to_dict(index=False)

{'a': blist([2, 33]), 'c': blist([2, 3])}

In [36]:
# return an OrderedDict()
df.to_dict(ordered=True)

OrderedDict([('index', blist([11, 12])),
             ('a', blist([2, 33])),
             ('c', blist([2, 3]))])

In [37]:
# return a list of just one column
df['c'].to_list()

blist([2, 3])

Sort by Index and Column
------------------------

In [38]:
df = rc.DataFrame({'a': [4, 3, 2, 1], 'b': [6, 7, 8, 9]}, index=[25, 24, 23, 22])
print(df)

  index    a    b
-------  ---  ---
     25    4    6
     24    3    7
     23    2    8
     22    1    9


In [39]:
# sort by index. Sorts are inplace
df.sort_index()
print(df)

  index    a    b
-------  ---  ---
     22    1    9
     23    2    8
     24    3    7
     25    4    6


In [40]:
# sort by column
df.sort_columns('b')
print(df)

  index    a    b
-------  ---  ---
     25    4    6
     24    3    7
     23    2    8
     22    1    9


Append
------

In [41]:
df1 = rc.DataFrame({'a': [1, 2], 'b': [5, 6]}, index=[1, 2])
df1.print()

  index    a    b
-------  ---  ---
      1    1    5
      2    2    6


In [42]:
df2 = rc.DataFrame({'b': [7, 8], 'c': [11, 12]}, index=[3, 4])
print(df2)

  index    b    c
-------  ---  ---
      3    7   11
      4    8   12


In [43]:
df1.append(df2)
print(df1)

  index    a    b    c
-------  ---  ---  ---
      1    1    5
      2    2    6
      3         7   11
      4         8   12


Math Methods
------------

In [44]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [2, 8, 9]})

In [45]:
# test for equality
df.equality('a', value=3)

[False, False, True]

In [46]:
# all math methods can operate on a subset of the index
df.equality('b', indexes=[1, 2], value=2)

[False, False]

In [47]:
# add two columns
df.add('a', 'b')

[3, 10, 12]

In [48]:
# subtract
df.subtract('b', 'a')

[1, 6, 6]

In [49]:
# multiply
df.multiply('a', 'b', [0, 2])

[2, 27]

In [50]:
# divide
df.divide('b', 'a')

[2.0, 4.0, 3.0]

Multi-Index
-----------
Raccoon does not have true hierarchical mulit-index capabilities like Pandas, but attempts to mimic some of the capabilities with the use of tuples as the index. Raccoon does not provide any checking to make sure the indexes are all the same length or
any other integrity checking.

In [54]:
tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]
df = rc.DataFrame({'a': [1, 2, 3, 4, 5, 6]}, index=tuples)
print(df)

index          a
-----------  ---
('a', 1, 3)    1
('a', 1, 4)    2
('a', 2, 3)    3
('b', 1, 4)    4
('b', 2, 1)    5
('b', 3, 3)    6


The select_index method works with tuples by allowing the * to act as a wild card for matching.

In [53]:
compare = ('a', None, None)
df.select_index(compare)

[True, True, True, False, False, False]

In [55]:
compare = ('a', None, 3)
df.select_index(compare, 'boolean')

[True, False, True, False, False, False]

In [56]:
compare = (None, 2, None)
df.select_index(compare, 'value')

[('a', 2, 3), ('b', 2, 1)]

In [58]:
compare = (None, None, 3)
df.select_index(compare, 'value')

[('a', 1, 3), ('a', 2, 3), ('b', 3, 3)]

In [59]:
compare = (None, None, None)
df.select_index(compare)

[True, True, True, True, True, True]