# Setup

In [None]:
from piper import piper
from piper.defaults import *
from piper.verbs import *
from piper.factory import get_sample_data
from piper.pandas import *
from piper.odbc import connections, connect
from piper.sql import insert, create_table
from psycopg2 import Error
import math

# Sample data - 1 million rows

In [None]:
df = pd.read_csv('inputs/1000000 Sales Records.csv')

## Clean data

Trim column names of leading and trailing spaces, 
Replace embedded spaces with underscore and lowercase.<br>
Why? To make it easier to work with columns using pandas dot notation<br> 
(e.g. instead of __df['Order Date']__ one can use __df.order_date__)

In [None]:
df = clean_columns(df) 

In [None]:
df.order_date = pd.to_datetime(df.order_date, format='%m/%d/%Y')
df.ship_date = pd.to_datetime(df.ship_date, format='%m/%d/%Y')

In [None]:
head(df, 1)

# Postgres DB example

## Create table in target database

Using the function 'create_table()', the required SQL statements to build 
a table can be generated, see below example output.

In [None]:
print(create_table(df, tablename='example_table'))

## Connect and insert data

Optionally, split and process data into multiple parts/pieces.

In [None]:
list_dataframes = np.array_split(df, indices_or_sections=10)

In [None]:
table_name = 'testtable'

sql = [f'''drop table if exists {table_name}''', 
       create_table(df, table_name)]
sql = '; '.join(sql)

with connect(connection='Connection1') as con:

    try:
        cursor = con.cursor()
        cursor.execute(sql)
        logger.debug(sql)

        for dx in list_dataframes:
            
            sql = insert(dx, table_name, info=False)
            logger.debug(sql)
            
            cursor.execute(sql)
            logger.info(f'{dx.shape[0]} rows inserted into {table_name} table.')

    except (Exception, Error) as error:
        logger.info(f"Error while connecting to {con}", error)
    finally:
        if con:
            cursor.close()
            logger.info("Connection closed")

## Check data

In [None]:
sql = f'''select * from {table_name}'''
df = read_sql(sql=sql, con=con, sql_info=False, info=False)
head(df, 2)