In [None]:
import pandas as pd
import sqlite3
import os

# create the sqlite DB
db = sqlite3.connect("MyDemo.db")

# read CSV into dataframe
df1 = pd.read_csv("demo_AmexTransactions.csv", header=None)

# convert the pandas dataframe to a sqlite table
df1.to_sql("stage_expenses_import", db, if_exists="replace")

def run_query(query):
    return pd.read_sql_query(query,db)

os.getcwd()

In [None]:
# view dataframe
df1

In [None]:
query = """
SELECT * 
FROM sqlite_master
ORDER BY name;
"""
run_query(query)

In [None]:
query = """
SELECT * 
FROM stage_expenses_import;
"""
run_query(query)

In [None]:
# get information about my staging table 
query_table_help = """
SELECT name , *FROM PRAGMA_TABLE_INFO('stage_expenses_import');
"""
run_query(query_table_help)

In [None]:
# example query showing date transform evolution & several other transforms 

query_format_columns = """
SELECT 
  "0" as "raw value"
 ,(trim("0")) as "TrimSpaces"
 ,substr((trim("0")) ,0,11) as "unformatted date substr"
 ,substr((trim("0")) ,7,4)||'-'||substr((trim("0")) ,1,2)||'-'||substr((trim("0")) ,4,2) as " formatted as ISO8601 str"
 ,datetime(substr((trim("0")) ,7,4)||'-'||substr((trim("0")) ,1,2)||'-'||substr((trim("0")) ,4,2)) as "ISO8601 as datetime"
 ,length(trim("2"))-1 as "column length-1" /* we get start position for the state substring below */
 ,substr("2" ,(length(trim("2"))-1) ,2) as "State" /* we dynamically substring the state from the last 2 characters */
 ,case (substr("2" ,(length(trim("2"))-1) ,2)) when "RK" then "NY" else (substr("2" ,(length(trim("2"))-1) ,2)) end as "Bad DEV Hard-Coded State"
 ,"6" as "Category"
 ,"8" as "Amount as real/float"
 ,"11" as "Vendor"
 ,cast(substr("13" ,1,5) as text) as "zc as txt"
 
from stage_expenses_import;
"""

run_query(query_format_columns)
#run_query(query_distinct_card)

In [None]:
# the query that populates stage_expenses_cleaned

query_transform_into_new_table = """
select 
  substr((trim("0")) ,7,4)||'-'||substr((trim("0")) ,1,2)||'-'||substr((trim("0")) ,4,2) as "Tran_Date"
 ,"11" as "Vendor_Name"
 ,case (substr("2" ,(length(trim("2"))-1) ,2)) when "RK" then "NY" else (substr("2" ,(length(trim("2"))-1) ,2)) end as "Vendor_State"
 ,cast(substr("13" ,1,5) as text) as "Vendor_Zip"
 ,"6" as "Vendor_Category Raw"
 ,case "11" when "AMAZON WEB SERVICES" then "Business Services" else "6" end as "Vendor_Category corrected"
 ,"8" as "Vendor_Amount"
from stage_expenses_import limit 5;
"""

query_distinct_card = """
select 
  distinct "6" 
 
from stage_expenses_import;
"""

run_query(query_transform_into_new_table)
#run_query(query_distinct_card)



In [None]:
# drop old table if exists 
c = db.cursor()

Drop_TableName = "stage_expenses_cleaned"

Drop_SQL = f"""
drop table if exists {Drop_TableName};
"""

c.execute(Drop_SQL)

c.close()

query_stage_expenses_cleaned = """
select 
*
from stage_expenses_cleaned;
"""

#run_query(query_stage_expenses_cleaned)
#run_query(query_distinct_card)

In [None]:
# create new table if not exists 
c = db.cursor()

Create_TableName = "stage_expenses_cleaned"

Create_SQL = f"""
create table if not exists {Create_TableName} as
select 
  substr((trim("0")) ,7,4)||'-'||substr((trim("0")) ,1,2)||'-'||substr((trim("0")) ,4,2) as "Tran_Date"
 ,"11" as "Vendor_Name"
 ,cast((case (substr("2" ,(length(trim("2"))-1) ,2)) when "RK" then "NY" else (substr("2" ,(length(trim("2"))-1) ,2)) end) as TEXT) as "Vendor_State"
 ,cast(substr("13" ,1,5) as text) as "Vendor_Zip"
 ,case "11" when "AMAZON WEB SERVICES" then "Business Services" else "6" end as "Vendor_Category"
 ,"8" as "Vendor_Amount"
from stage_expenses_import;
"""

c.execute(Create_SQL)

c.close()

stage_expenses_cleaned = """
select 
*
from stage_expenses_cleaned
order by Vendor_Amount desc;
"""

run_query(stage_expenses_cleaned)
#run_query(query_distinct_card)

In [None]:
# df1.info() 

# converting to string data type 
df1[0] = df1[0].astype(str)     
    
## slicing to extract date str
#df1[0] = df1[0].str.slice(0, 10, 1)  

#df1[0] = pd.to_datetime(df1[0])  

df1[0] = pd.to_datetime(df1[0].str.slice(0, 10, 1))

# convert a positive INT to negative & vice versa 
df1[8] = df1[8]*-1

# display 
#df1[0].head(10) 
df1[0].head(2) 
df1[8].head(2) 

