In [None]:
# this demo requires existence of table "stage_expenses_cleaned" created in "demo_SQL_DataCleanup"
import pandas as pd
import sqlite3
import os
import matplotlib as mpl
from src import queries as Q

# create the sqlite DB
db = sqlite3.connect(":memory:")

# read CSV into dataframe
df1 = pd.read_csv("Data/stage_expenses_cleaned.csv", header="infer")

# convert the pandas dataframe to a sqlite table
df1.to_sql("stage_expenses_cleaned", db, if_exists="replace")

def run_query(query):
    return pd.read_sql_query(query,db)

os.getcwd()

In [None]:
# this handy pandas method displays a dataframe's aggregate metadata
# which also conveniently shows you basic aggregations (count, mean, min ... )
df1.describe()

In [None]:
# query to show us what tables exist in DB
run_query(Q.show_tables)

In [None]:
# get information about my staging table 
query_table_help = Q.query_table_help("stage_expenses_cleaned")
run_query(query_table_help)

In [None]:
get_data = Q.select_top10("stage_expenses_cleaned")
run_query(get_data)

In [None]:
# create key metrics by US state
query_state_KM = Q.query_state_KM

run_query(query_state_KM)


In [None]:
# create key metrics by Category

run_query(Q.query_category_KM)

In [None]:
# create state count for map


df2 = run_query(Q.query_st_map)

# we can also re-sort the dataframe
df2 = df2.sort_values("TransCount", ascending = False).reset_index(drop=True)


df2

In [None]:
# Plotting by Shane -- requires manual install of plotly library
#from plotly.express import d3_plot
#df.rename(columns={'Vendor_State': 'Vendor State'}, inplace=True)
#d3_bar(df, x='Vendor State', y='Trans Count', color='Vendor State', title='Stage Expenses Cleaned')

In [None]:
# we add the %matplotlib inline magic command to see our plot inside the jupyter notebook. 
# If you build the plot in a .py file, then remove the %matplotlib inline command as it will raise an error
%matplotlib inline

my_plot = df2.plot(x="VendorState", y="TransCount" ,kind="bar",legend=None,title="Transaction Count by State" ,color = "green")
my_plot.set_xlabel("States")
my_plot.set_ylabel("Count")

mpl.pyplot.show(my_plot)
