In [None]:
#hide
from deltaframe.core import *
import pandas as pd

# Deltaframe

> Delta between two or more Pandas dataframes.

This file will become your README and also the index of your documentation.

## Install

`pip install deltaframe`

## How to use deltaframe

First, lets create two dataframes (one from the previous day, one from the current day) - items have been 
- added
- removed and
- modified

In [None]:
df1=pd.DataFrame({
    'date':['2013-11-24','2013-11-24','2013-11-24','2013-11-24'],
    'id':['001','002','003','004'],
    'quantity':[22,8,7,10],
    'color':['Yellow','Orange','Red','Yellow'],
})
df2=pd.DataFrame({
    'date':['2013-11-24','2013-11-25','2013-11-24','2013-11-24'],
    'id':['001','002', '004', '005'],
    'quantity':[22,6,5,10],
    'color':['Yellow','Orange','Red','Pink'],
})

In [None]:
df1

Unnamed: 0,date,id,quantity,color
0,2013-11-24,1,22,Yellow
1,2013-11-24,2,8,Orange
2,2013-11-24,3,7,Red
3,2013-11-24,4,10,Yellow


In [None]:
df2

Unnamed: 0,date,id,quantity,color
0,2013-11-24,1,22,Yellow
1,2013-11-25,2,6,Orange
2,2013-11-24,4,5,Red
3,2013-11-24,5,10,Pink


#### First look at entries that were added (in df2 but not in df1)

In [None]:
added_entries = get_added_entries(df_old=df1, df_new=df2, unique_id="id")
added_entries

Unnamed: 0,date,id,quantity,color,transaction
4,2013-11-24,5,10.0,Pink,added


#### What about removed entries (in df1 but not any longer in df2)

In [None]:
removed_entries = get_removed_entries(df_old=df1, df_new=df2, unique_id="id")
removed_entries

Unnamed: 0,date,id,quantity,color,transaction
4,2013-11-24,3,7.0,Red,removed


#### Awesome, finally we check for the modified entries (initially not considering new items)

In [None]:
modified_entries = get_modified_entries(df_old=df1, df_new=df2, unique_id="id")
modified_entries

Unnamed: 0,date,id,quantity,color,transaction
4,2013-11-25,2,6,Orange,modified
5,2013-11-24,4,5,Red,modified
6,2013-11-24,5,10,Pink,modified


#### Finally, let's build a logging file to document when an entry been added, modified or deleted.

Initially there is no log file so we set `df_log=None`

In [None]:
df_log = logging(df_log=None, df_old=df1, df_new=df2, unique_id="id")
df_log

Unnamed: 0,date,id,quantity,color,transaction
0,2013-11-24,1,22,Yellow,added
1,2013-11-24,2,8,Orange,added
2,2013-11-24,3,7,Red,added
3,2013-11-24,4,10,Yellow,added


When there's an existing log file we happily pass it to our logging function...

In [None]:
df_log = logging(df_log=df_log, df_old=df1, df_new=df2, unique_id="id")
df_log

Unnamed: 0,date,id,quantity,color,transaction
0,2013-11-24,1,22.0,Yellow,added
1,2013-11-24,2,8.0,Orange,added
2,2013-11-24,3,7.0,Red,added
3,2013-11-24,4,10.0,Yellow,added
4,2013-11-25,2,6.0,Orange,modified
5,2013-11-24,4,5.0,Red,modified
6,2013-11-24,5,10.0,Pink,added
7,2013-11-24,3,7.0,Red,removed


Finally, if we want to sort our log file by a particular column.

In [None]:
df_log = logging(df_log=df_log, df_old=df1, df_new=df2, unique_id="id", sort_by=["date"])
df_log

Unnamed: 0,date,id,quantity,color,transaction
0,2013-11-24,1,22.0,Yellow,added
1,2013-11-24,2,8.0,Orange,added
2,2013-11-24,3,7.0,Red,added
3,2013-11-24,4,10.0,Yellow,added
4,2013-11-24,4,5.0,Red,modified
5,2013-11-24,5,10.0,Pink,added
7,2013-11-24,5,10.0,Pink,added
8,2013-11-24,3,7.0,Red,removed
6,2013-11-25,2,6.0,Orange,modified
