In [None]:
# default_exp core

# 00 Prodb API details

> `nbdev` Example function.

In [None]:
#hide
from nbdev.showdoc import *
%load_ext autoreload
%autoreload 2

## Prodb API
Create pandas dataframe for manipulation

In [None]:
#export
#hide
import pandas as pd
import arrow
import os
from time import gmtime, strftime
#from prodb.core import generate_db, insert_row, utc_now, readable_df

## 1.0 Generate Db
Initialise database with `generate_db` call. 

In [None]:
# export

def generate_db(dbpath='db.csv', 
                cols=['_c1', '_c2', '_c3'],
                verbose=True,
                prefill=False):
    """Create example database"""
    if prefill:
        df = pd.DataFrame({'name': ['Sam', 'Grant'],
                           'mood': ['😊', '😵'],
                           'message': ['hi', 'hello'],
                           'time_utc' : [arrow.utcnow().format('YYYY-MM-DD HH:mm:ss'),
                                         arrow.utcnow().format('YYYY-MM-DD HH:mm:ss')]})
    else:
        df = pd.DataFrame(columns=cols)
    df.to_csv(dbpath, index=None)
    if verbose: print(f'✓💾 {dbpath} ({os.path.getsize(dbpath)} kb)')
    return df


Generate empty `db` object with arbitary columns.

In [None]:
df = generate_db(cols=['c1', 'c2', 'c3'])
df.head()

✓💾 db.csv (9 kb)


Unnamed: 0,c1,c2,c3


Generate `db` with some example data.

In [None]:
df = generate_db(prefill=True)
df.head()

✓💾 db.csv (96 kb)


Unnamed: 0,name,mood,message,time_utc
0,Sam,😊,hi,2021-12-30 15:43:28
1,Grant,😵,hello,2021-12-30 15:43:28


## 2.0 Insert Row

In [None]:
#export

def insert_row(df, data, dbpath='db.csv'):
    new_row = pd.Series(data)
    df = df.append(new_row, ignore_index=True)
    df.to_csv(dbpath, index=None)
    return df

def insert_rows(df, data, dbpath='db.csv'):
    new_rows = pd.DataFrame.from_dict(data)
    df = df.append(new_rows, ignore_index=True)
    df.to_csv(dbpath, index=None)
    return df


### Example: Insert single row

In [None]:
df = generate_db(cols='name mood message'.split())

# ================================================================ #

data = {'name':'George', 'mood': '👹', 'message':'hi'}
df = insert_row(df, data)

# ================================================================ #

display(df)


✓💾 db.csv (18 kb)


Unnamed: 0,name,mood,message
0,George,👹,hi


### Example: Insert multiple rows
Note: `prodb` allows additional columns to be added after the database has been initialised.   
 
i.e. the `['name', 'mood', 'message']` columns that initialise the db, play nice when the additional `time_utc` dict is passed in. NaNs are imputed for previous rows.

First, a helper function to clean up timestamp calls.

In [None]:
# export
def utc_now():
    return arrow.utcnow().format('YYYY-MM-DD HH:mm:ss')

In [None]:
df = generate_db(cols='name mood message'.split())

# ================================================================ #

data = {'name': ['Sam', 'Grant'],
        'mood': ['😊', '😵'],
        'message': ['hello from London, UK', 'hello from Christchurch, NZ'],
        'time_utc' : [utc_now(), utc_now()]}

df = insert_rows(df, data)

# ================================================================ #

display(df)

✓💾 db.csv (18 kb)


Unnamed: 0,name,mood,message,time_utc
0,Sam,😊,"hello from London, UK",2021-12-30 15:43:34
1,Grant,😵,"hello from Christchurch, NZ",2021-12-30 15:43:34


Add another entry to the bottom of the database using `insert_row()`.   

Note: Data columns from dictionary do not need to be in pre-defined order.

In [None]:
data = {'time_utc' : utc_now(),
        'name': 'Luke', 
        'mood': '👹', 
        'message': 'hello from London, UK'}
df = insert_row(df, data)

display(df)

Unnamed: 0,name,mood,message,time_utc
0,Sam,😊,"hello from London, UK",2021-12-30 15:43:34
1,Grant,😵,"hello from Christchurch, NZ",2021-12-30 15:43:34
2,Luke,👹,"hello from London, UK",2021-12-30 15:43:36


In [None]:
data = {'name':'Bill', 'mood': '👹', 'message':'hi', 'time_utc':arrow.utcnow().format('YYYY-MM-DD HH:mm:ss')}
df = insert_row(df, data)
display(df)

Unnamed: 0,name,mood,message,time_utc
0,Sam,😊,"hello from London, UK",2021-12-30 15:43:34
1,Grant,😵,"hello from Christchurch, NZ",2021-12-30 15:43:34
2,Luke,👹,"hello from London, UK",2021-12-30 15:43:36
3,Bill,👹,hi,2021-12-30 15:43:38


In [None]:
#export
def readable_df(df, max_rows=8, col_name='human_time'):
    if 'time_utc' in df.columns:
        df[col_name] = df.time_utc.apply(lambda x: arrow.get(x).humanize())
    return df.tail(max_rows)

In [None]:
data = {'name':'Luke', 
        'mood': '😊', 
        'message': 'hello, from UK',
        'time_utc': utc_now()}
df = insert_row(df, data)
readable_df(df, max_rows=10)

Unnamed: 0,name,mood,message,time_utc,human_time
0,Sam,😊,"hello from London, UK",2021-12-30 15:43:34,10 seconds ago
1,Grant,😵,"hello from Christchurch, NZ",2021-12-30 15:43:34,10 seconds ago
2,Luke,👹,"hello from London, UK",2021-12-30 15:43:36,just now
3,Bill,👹,hi,2021-12-30 15:43:38,just now
4,Luke,😊,"hello, from UK",2021-12-30 15:43:44,just now


# Classify
Extending pandas dataframes with our custom functions.

In [None]:
#export

class Prodb(pd.DataFrame):
    # persistent properties
    _metadata = ["dbpath"]

    @property
    def _constructor(self):
        return Prodb
    
    def prodb_summary(self):
        print(f'✓💾 {self.dbpath} ({os.path.getsize(self.dbpath)} KB)', end='\t')
        print(f"shape: {self.shape}", end='')
        display(self.tail(5))
        
    def prodb_generate(self, 
                       dbpath='db.csv',
                       verbose=True):
        self.dbpath = dbpath
        self.to_csv(dbpath, index=None)
        if verbose: self.prodb_summary()
    
    def prodb_insert(self, 
                     data):
        """Insert row(s) into dataframe"""
        if isinstance(list(data.values())[0], str): # single row
            ndata = pd.Series(data)
        else: ndata = pd.DataFrame.from_dict(data)  # multiple rows

        df = self.append(ndata, ignore_index=True)
        df.to_csv(df.dbpath, index=None)
        return df


## Initialize

In [None]:
data = {'name': ['Sam', 'Grant'],
        'mood': ['😊', '😵'],
        'message': ['hi', 'hello'],
        'time_utc' : [arrow.utcnow().format('YYYY-MM-DD HH:mm:ss'),
                      arrow.utcnow().format('YYYY-MM-DD HH:mm:ss')]}

df = Prodb(data)

In [None]:
df.prodb_generate(dbpath='db.csv')

✓💾 db.csv (96 KB)	shape: (2, 4)

Unnamed: 0,name,mood,message,time_utc
0,Sam,😊,hi,2021-12-30 16:29:55
1,Grant,😵,hello,2021-12-30 16:29:55


## Example: Insert single row

In [None]:
df = df.prodb_insert({'name':'George', 'mood': '👹', 'message':'hi'})
df.prodb_summary()

✓💾 db.csv (112 KB)	shape: (3, 4)

Unnamed: 0,name,mood,message,time_utc
0,Sam,😊,hi,2021-12-30 16:29:55
1,Grant,😵,hello,2021-12-30 16:29:55
2,George,👹,hi,


## Example: Insert multiple rows

In [None]:
data = {'name': ['Multiple', 'Rows'],
        'mood': ['😊', '😵'],
        'message': ['hello', 'hello'],
        'time_utc' : [utc_now(), utc_now()]}

df = df.prodb_insert(data)
df.prodb_summary()

✓💾 db.csv (188 KB)	shape: (5, 4)

Unnamed: 0,name,mood,message,time_utc
0,Sam,😊,hi,2021-12-30 16:29:55
1,Grant,😵,hello,2021-12-30 16:29:55
2,George,👹,hi,
3,Multiple,😊,hello,2021-12-30 16:29:56
4,Rows,😵,hello,2021-12-30 16:29:56


In [None]:
%%time
for i in range(10):
    df = df.prodb_insert(data)
df.prodb_summary()

✓💾 db.csv (948 KB)	shape: (25, 4)

Unnamed: 0,name,mood,message,time_utc
20,Rows,😵,hello,2021-12-30 16:29:56
21,Multiple,😊,hello,2021-12-30 16:29:56
22,Rows,😵,hello,2021-12-30 16:29:56
23,Multiple,😊,hello,2021-12-30 16:29:56
24,Rows,😵,hello,2021-12-30 16:29:56


CPU times: user 31.2 ms, sys: 0 ns, total: 31.2 ms
Wall time: 66.9 ms
