<h1>Getting Started</h1>
1. signup for Rasgo (https://app.rasgoml.com/account/register) and get your API key from the top right corner in the UI<br>
2. install pyrasgo (next cell down)<br>
3. check out the docs: https://docs.rasgoml.com/rasgo-docs/pyrasgo/user-defined-transforms-udts<br>
4. follow along with this tutorial

In [None]:
!pip install pyrasgo
import pyrasgo

In [73]:
#paste your Rasgo API key in the parentheses
rasgo = pyrasgo.connect('')

In [74]:
# Get all available transforms
transforms = rasgo.get.transforms()
for t in transforms:
    print(t.name)
    for a in t.arguments:
        print('     ', a['name'], ':', a['description'])

rasgo_filter
      filter_statements : List of where statements filter the table by. Ex. ["<col_name> = 'string'", "<col_name> IS NOT NULL"
rasgo_pivot
      dimensions : dimension columns after the pivot runs
      pivot_column : column to pivot and aggregate
      value_column : column with row values that will become columns
      agg_method : method of aggregation (i.e. sum, avg, min, max, etc.)
      list_of_vals : optional argument to override the dynamic lookup of all values in the value_column and only pivot a provided list of values
rasgo_lag
      columns : names of column(s) you want to lag
      amounts : Magnitude of amounts you want to use for the lag. Positive values result in a historical offset; negative amounts result in forward-looking offset.
      partition : name of column(s) to partition by for the lag
      order_by : name of column(s) to order by in the final data set
rasgo_movingavg
      input_columns : names of column(s) you want to moving average
      wind

In [50]:
sources = rasgo.get.data_sources()
for source in sources:
    if 'ADVENTUREWORKS' in source.name:
        print(source.id, source.table)

2614 DIMACCOUNT
2615 DIMCURRENCY
2622 DIMSALESREASON
2616 DIMCUSTOMER
2621 DIMPROMOTION
2617 DIMDATE
2632 DIMPRODUCTCATEGORY
2618 DIMDEPARTMENTGROUP
2626 NEWFACTCURRENCYRATE
2619 DIMGEOGRAPHY
2620 DIMORGANIZATION
2623 DIMRESELLER
2624 FACTADDITIONALINTERNATIONALPRODUCTDESCRIPTION
2625 FACTPRODUCTINVENTORY
2627 FACTCALLCENTER
2629 FACTRESELLERSALES
2628 FACTINTERNETSALESREASON
2630 FACTSALESQUOTA
2631 PROSPECTIVEBUYER
2633 DIMPRODUCTSUBCATEGORY
2634 DIMSCENARIO
2635 FACTCURRENCYRATE
2636 FACTFINANCE
2637 FACTINTERNETSALES


In [36]:
internet_sales = rasgo.get.data_source(id=2637)
customer = rasgo.get.data_source(id=2616)
dim_date = rasgo.get.data_source(id=2617)

<h2>Project 1: Forecast Direct Sales</h2>

In [34]:
# preview data set
rasgo.read.source_data(internet_sales.id, limit=100).head()

Unnamed: 0,PRODUCTKEY,ORDERDATEKEY,DUEDATEKEY,SHIPDATEKEY,CUSTOMERKEY,PROMOTIONKEY,CURRENCYKEY,SALESTERRITORYKEY,SALESORDERNUMBER,SALESORDERLINENUMBER,...,PRODUCTSTANDARDCOST,TOTALPRODUCTCOST,SALESAMOUNT,TAXAMT,FREIGHT,CARRIERTRACKINGNUMBER,CUSTOMERPONUMBER,ORDERDATE,DUEDATE,SHIPDATE
0,310,20101229,20110110,20110105,21768,1,19,6,SO43697,1,...,2171.2942,2171.2942,3578.27,286.2616,89.4568,,,2010-12-29,2011-01-10,2011-01-05
1,346,20101229,20110110,20110105,28389,1,39,7,SO43698,1,...,1912.1544,1912.1544,3399.99,271.9992,84.9998,,,2010-12-29,2011-01-10,2011-01-05
2,346,20101229,20110110,20110105,25863,1,100,1,SO43699,1,...,1912.1544,1912.1544,3399.99,271.9992,84.9998,,,2010-12-29,2011-01-10,2011-01-05
3,336,20101229,20110110,20110105,14501,1,100,4,SO43700,1,...,413.1463,413.1463,699.0982,55.9279,17.4775,,,2010-12-29,2011-01-10,2011-01-05
4,346,20101229,20110110,20110105,11003,1,6,9,SO43701,1,...,1912.1544,1912.1544,3399.99,271.9992,84.9998,,,2010-12-29,2011-01-10,2011-01-05


In [77]:
# convert date string to date

t1 = internet_sales.transform(
  transform_name='rasgo_todate',
  format_expression = 'YYYYMMDD',
  date_columns = ['ORDERDATEKEY'])

print(t1.preview_sql())

SELECT *,
TO_DATE(ORDERDATEKEY, 'YYYYMMDD') as ORDERDATEKEY_todate 
from ADVENTUREWORKS.PUBLIC.FACTINTERNETSALES


In [68]:
# truncate date to week in a new column

t1 = internet_sales.transform(
  transform_name='rasgo_datetrunc',
  date_part = 'week',
  date_column = ['ORDERDATEKEY'])

t1.preview_sql()

'\n    SELECT *,DATE_TRUNC(week, ORDERDATEKEY) as ORDERDATEKEY_week from ADVENTUREWORKS.PUBLIC.FACTINTERNETSALES'

In [90]:
# aggregate sales to week

t1 = internet_sales.transform(
  transform_name='rasgo_group_by',
  group_items=['ORDERDATEKEY'],
  aggregations={
      'SALESAMOUNT': ['SUM', 'AVG'],
      'TAXAMT': ['SUM'],
      'FREIGHT': ['SUM']
  }
)

t1.preview()

Unnamed: 0,ORDERDATEKEY,SALESAMOUNT_SUM,SALESAMOUNT_AVG,TAXAMT_SUM,FREIGHT_SUM
0,20101229,14477.3382,2895.46764,1158.1871,361.9337
1,20101230,13931.52,3482.88,1114.5216,348.2882
2,20101231,15012.1782,3002.43564,1200.9743,375.3047
3,20110101,7156.54,3578.27,572.5232,178.9136
4,20110102,15012.1782,3002.43564,1200.9743,375.3047
5,20110103,14313.08,3578.27,1145.0464,357.8272
6,20110104,7855.6382,2618.5460666667,628.4511,196.3911
7,20110106,20909.78,3484.9633333333,1672.7824,522.7448
8,20110108,14313.08,3578.27,1145.0464,357.8272
9,20110109,14134.8,3533.7,1130.784,353.3702


In [91]:
# add lags for forecast model

t2 = t1.transform(
    transform_name='rasgo_lag',
    columns = ['SALESAMOUNT_SUM'],
    amounts = [1,2,3,4,26,52],
    order_by = ['ORDERDATEKEY'],
    partition = ['ORDERDATEKEY']
)

t2.preview()

Unnamed: 0,ORDERDATEKEY,SALESAMOUNT_SUM,SALESAMOUNT_AVG,TAXAMT_SUM,FREIGHT_SUM,LAG_SALESAMOUNT_SUM_1,LAG_SALESAMOUNT_SUM_2,LAG_SALESAMOUNT_SUM_3,LAG_SALESAMOUNT_SUM_4,LAG_SALESAMOUNT_SUM_26,LAG_SALESAMOUNT_SUM_52
0,20121111,15867.2606,1763.0289555556,1269.381,396.6816,,,,,,
1,20130325,28647.12,240.7321008403,2291.7696,716.1833,,,,,,
2,20130307,36027.71,255.5156737589,2882.2168,900.6989,,,,,,
3,20120914,29939.4934,1995.9662266667,2395.1597,748.4878,,,,,,
4,20130207,23441.98,236.7876767677,1875.3584,586.0541,,,,,,
5,20110414,6953.26,3476.63,556.2608,173.8316,,,,,,
6,20110603,14833.8982,2966.77964,1186.7119,370.8477,,,,,,
7,20110527,7855.6382,2618.5460666667,628.4511,196.3911,,,,,,
8,20131004,62806.51,358.8943428571,5024.5208,1570.1705,,,,,,
9,20130521,58402.96,337.5893641618,4672.2368,1460.0818,,,,,,


In [40]:
# apply train-test split

In [41]:
#t1.preview()

<h1>Project 2: Analyze Customer Behavior</h1>

In [207]:
t1 = stock_source.transform(
    transform_name='my_pivot',
    #source_code = sourcecode,
    dimensions=['DATE'],
    pivot_column='CLOSE',
    value_column='SYMBOL',
    agg_method='AVG',
    list_of_vals=['JP','GOOG','DIS','APLE']
)
print(t1.preview_sql())

SELECT DATE, JP, GOOG, DIS, APLE
FROM ( SELECT DATE, CLOSE, SYMBOL FROM RASGOCOMMUNITY.PUBLIC.ZEPL_DAILY_STOCK_FEATURES)
PIVOT ( AVG ( CLOSE ) FOR SYMBOL IN ( 'JP', 'GOOG', 'DIS', 'APLE' ) ) as p ( DATE, JP, GOOG, DIS, APLE )


In [None]:
t1.to_source()