## StyleMeUp - Fraud Detection in Online Retail 
### Data Loading and Prep Notebook

In [1]:
from snowflake.snowpark.session import Session
from snowflake.snowpark.functions import udf, avg, col
from snowflake.snowpark.types import IntegerType, FloatType, StringType, BooleanType
import pandas as pd
from config import snowfalke_conn_prop

In [2]:
from snowflake.snowpark import version
print(version.VERSION)

(0, 2, 0, None)


#### Connect to Snowflake

In [3]:
session = Session.builder.configs(snowfalke_conn_prop).create()
print(session.sql('create schema if not exists frauddemo').collect())
print(session.sql('use schema frauddemo').collect())
print(session.sql('drop table if exists orders').collect())
print(session.sql('drop table if exists orders_details').collect())
print(session.sql('drop table if exists enriched_data').collect())
print(session.sql('drop table if exists fraud_detection').collect())
print(session.sql('drop table if exists new_transaction_data').collect())
print(session.sql('use schema frauddemo').collect())
print(session.sql('create stage if not exists UDFSTAGE').collect())
print(session.sql('select current_warehouse(), current_database(), current_schema()').collect())

[Row(status='FRAUDDEMO already exists, statement succeeded.')]
[Row(status='Statement executed successfully.')]
[Row(status='ORDERS successfully dropped.')]
[Row(status='Drop statement executed successfully (ORDERS_DETAILS already dropped).')]
[Row(status='ENRICHED_DATA successfully dropped.')]
[Row(status='FRAUD_DETECTION successfully dropped.')]
[Row(status='NEW_TRANSACTION_DATA successfully dropped.')]
[Row(status='Statement executed successfully.')]
[Row(status='UDFSTAGE already exists, statement succeeded.')]
[Row(CURRENT_WAREHOUSE()='LAB_L_WH', CURRENT_DATABASE()='DEMO', CURRENT_SCHEMA()='FRAUDDEMO')]


In [4]:
#print(session.sql('CREATE STAGE if not exists fraud_data').collect())
#print(session.sql('PUT file://order*.csv @fraud_data').collect())

In [5]:
df = pd.read_csv('orders.csv')
df.head()
df.dtypes

ISFRAUD                int64
TRNX_ID               object
IP_ADDRESS            object
CITY                  object
SHIPPING_ZIPCODE       int64
SHIPPING_STATE        object
PAYMENT_NETWORK       object
PAYMENT_TYPE          object
TOTAL_TRNX_AMOUNT    float64
JOIN_KEY               int64
dtype: object

In [6]:
df = df.astype({"JOIN_KEY":str, "SHIPPING_ZIPCODE":str})
df.dtypes

ISFRAUD                int64
TRNX_ID               object
IP_ADDRESS            object
CITY                  object
SHIPPING_ZIPCODE      object
SHIPPING_STATE        object
PAYMENT_NETWORK       object
PAYMENT_TYPE          object
TOTAL_TRNX_AMOUNT    float64
JOIN_KEY              object
dtype: object

In [7]:
%%time
snowpark_df = session.write_pandas(df, 'ORDERS', auto_create_table=True)

CPU times: user 776 ms, sys: 51.6 ms, total: 828 ms
Wall time: 19.3 s


In [8]:
df2 = pd.read_csv('order_details.csv')
df2.head()
df2.dtypes

TRNX_ID     object
ITEM        object
PRICE      float64
QTY          int64
dtype: object

In [9]:
%%time
snowpark_df = session.write_pandas(df2, 'ORDER_DETAILS', auto_create_table=True)

CPU times: user 955 ms, sys: 85.3 ms, total: 1.04 s
Wall time: 21.5 s
