Example Usage for Raccoon
========================

In [8]:
# import libraries
import raccoon as rc

Initialize
----------

In [2]:
# empty DataFrame
df = rc.DataFrame()
df

object id: 3051857425408
columns:
[]
data:
[]
index:
[]

In [3]:
# with columns and indexes but no data
df = rc.DataFrame(columns=['a', 'b', 'c'], index=[1, 2, 3])
df

object id: 3051860750576
columns:
['a', 'b', 'c']
data:
[[None, None, None], [None, None, None], [None, None, None]]
index:
[1, 2, 3]

In [4]:
# with data
df = rc.DataFrame(data={'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[10, 11, 12], columns=['a', 'b'])
df

object id: 3051860751416
columns:
['a', 'b']
data:
[[1, 2, 3], [4, 5, 6]]
index:
[10, 11, 12]

Print
-----

In [5]:
df.show()

AttributeError: 'DataFrame' object has no attribute 'show'

Setters and Getters
-------------------

In [None]:
# columns
df.columns

In [None]:
df.columns = ['first', 'second']
print(df)

In [None]:
# columns can be renamed with a dict()
df.rename_columns({'second': 'b', 'first': 'a'})
df.columns

In [None]:
# index
df.index

In [None]:
#indexes can be any non-repeating unique values
df.index = ['apple', 'pear', 7.7]
df.show()

In [None]:
df.index = [10, 11, 12]

In [None]:
# the index can also have a name, befault it is "index"
df.index_name

In [None]:
df.index_name = 'units'
df.index_name

In [None]:
# data is a shallow copy, be careful on how this is used
df.index_name = 'index'
df.data

Select Index
------------

In [None]:
df.select_index(11)

Set Values
----------

In [None]:
# set a single cell
df.set(10, 'a', 100)
df.show()

In [None]:
# set a value outside current range creates a new row and/or column. Can also use [] for setting
df[13, 'c'] = 9
df.show()

In [None]:
# set column
df['b'] = 55
df.show()

In [None]:
# set a subset of column
df[[10, 12], 'b'] = 66
print(df)

In [None]:
# using boolean list
df.set([True, False, True, False], 'b', [88, 99])
print(df)

In [None]:
# setting with slices
df[12:13, 'a'] = 33
print(df)

In [None]:
df[10:12, 'c'] = [1, 2, 3]
print(df)

In [None]:
# append a row, DANGEROUS as there is not validation checking, but can be used for speed
df.append_row(14, {'a': 44, 'c': 100, 'd': 99})
print(df)

Get Values
----------

In [None]:
# get a single cell
df[10, 'a']

In [None]:
# get an entire column
df['c'].show()

In [None]:
# get list of columns
df[['a', 'c']].show()

In [None]:
# get subset of the index
df[[11, 12, 13], 'b'].show()

In [None]:
# get using slices
df[11:13, 'b'].show()

In [None]:
# get a matrix
df[10:11, ['a', 'c']].show()

In [None]:
# get a column, return as a list
df.get(columns='a', as_list=True)

In [None]:
# get a row and return as a dictionary
df.get_columns(index=13, columns=['a', 'b'], as_dict=True)

Set and Get by Location
-----------------------
Locations are the index of the index, in other words the index locations from 0...len(index)

In [None]:
df.get_locations(locations=[0, 2]).show()

In [None]:
df.set_locations(locations=[0, 2], column='a', values=-9)
df.show()

Head and Tail
-------------

In [None]:
df.head(2).show()

In [None]:
df.tail(2).show()

Delete colunmns and rows
------------------------

In [None]:
df.delete_rows([10, 13])
print(df)

In [None]:
df.delete_columns('b')
print(df)

Convert
-------

In [None]:
# return a dict
df.to_dict()

In [None]:
# exclude the index
df.to_dict(index=False)

In [None]:
# return an OrderedDict()
df.to_dict(ordered=True)

In [None]:
# return a list of just one column
df['c'].to_list()

In [None]:
# convert to JSON
string = df.to_json()
print(string)

In [None]:
# construct DataFrame from JSON
df_from_json = rc.from_json(string)
print(df_from_json)

Sort by Index and Column
------------------------

In [None]:
df = rc.DataFrame({'a': [4, 3, 2, 1], 'b': [6, 7, 8, 9]}, index=[25, 24, 23, 22])
print(df)

In [None]:
# sort by index. Sorts are inplace
df.sort_index()
print(df)

In [None]:
# sort by column
df.sort_columns('b')
print(df)

In [None]:
# sort by column in reverse order
df.sort_columns('b', reverse=True)
print(df)

Append
------

In [None]:
df1 = rc.DataFrame({'a': [1, 2], 'b': [5, 6]}, index=[1, 2])
df1.show()

In [None]:
df2 = rc.DataFrame({'b': [7, 8], 'c': [11, 12]}, index=[3, 4])
print(df2)

In [None]:
df1.append(df2)
print(df1)

Math Methods
------------

In [None]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [2, 8, 9]})

In [None]:
# test for equality
df.equality('a', value=3)

In [None]:
# all math methods can operate on a subset of the index
df.equality('b', indexes=[1, 2], value=2)

In [None]:
# add two columns
df.add('a', 'b')

In [None]:
# subtract
df.subtract('b', 'a')

In [None]:
# multiply
df.multiply('a', 'b', [0, 2])

In [None]:
# divide
df.divide('b', 'a')

Multi-Index
-----------
Raccoon does not have true hierarchical mulit-index capabilities like Pandas, but attempts to mimic some of the capabilities with the use of tuples as the index. Raccoon does not provide any checking to make sure the indexes are all the same length or
any other integrity checking.

In [None]:
tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]
df = rc.DataFrame({'a': [1, 2, 3, 4, 5, 6]}, index=tuples)
print(df)

The select_index method works with tuples by allowing the * to act as a wild card for matching.

In [None]:
compare = ('a', None, None)
df.select_index(compare)

In [None]:
compare = ('a', None, 3)
df.select_index(compare, 'boolean')

In [None]:
compare = (None, 2, None)
df.select_index(compare, 'value')

In [None]:
compare = (None, None, 3)
df.select_index(compare, 'value')

In [None]:
compare = (None, None, None)
df.select_index(compare)

Reset Index
-----------

In [None]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'])
print(df)

In [None]:
df.reset_index()
df

In [None]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')
print(df)

In [None]:
df.reset_index()
print(df)

In [None]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'],
                   index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo'))
print(df)

In [None]:
df.reset_index()
print(df)

In [None]:
df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')
print(df)

In [None]:
df.reset_index(drop=True)
print(df)

Iterators
---------

In [None]:
df = rc.DataFrame({'a': [1, 2, 'c'], 'b': [5, 6, 'd']}, index=[1, 2, 3])

In [None]:
for row in df.iterrows():
    print(row)

In [None]:
for row in df.itertuples():
    print(row)

Sorted DataFrames
-----------------
DataFrames will be set to sorted by default if no index is given at initialization. If an index is given at initialization then the parameter sorted must be set to True

In [None]:
df = rc.DataFrame({'a': [3, 5, 4], 'b': [6, 8, 7]}, index=[12, 15, 14], sorted=True)

When sorted=True on initialization the data will be sorted by index to start

In [None]:
df.show()

In [None]:
df[16, 'b'] = 9
print(df)

In [None]:
df.set(indexes=13, values={'a': 3.5, 'b': 6.5})
print(df)

List or BList
-------------
The underlying data structure can be either blist (default) or list

In [None]:
# Construct with blist=True, the default
df_blist = rc.DataFrame({'a': [1, 2, 3]}, index=[5, 6, 7], use_blist=True)

In [None]:
# see that the data structures are all blists
df_blist.data

In [None]:
df_blist.index

In [None]:
df_blist.columns

In [None]:
# now construct as blist = False and they are all lists
df_list = rc.DataFrame({'a': [1, 2, 3]}, index=[5, 6, 7], use_blist=False)

In [None]:
df_list.data

In [None]:
df_list.index

In [None]:
df_list.columns