# Create Fake Data in CAS

Creates n number of rows per available thread. Depending on the size you choose this can take a few minutes. 

### **WARNING**: You can accidently create a massive CAS table and CSV file if you specify a large number of rows per thread. By default, only 100 rows will be created per thread.

Program will:
- Create a CAS table named orders in the casuser caslib.
- Save the CAS table as a CSV file name ordersDemo.csv in the casuser caslib.
- Drop the orders CAS table.

Requirements: You need to connect to the CAS server to execute this program.

In [None]:
## Packages
import swat
import pandas as pd
from casConnect import connect_to_cas ## my personal module to connect to CAS

## Options
pd.set_option('display.max_columns', 50)

## My personal connection to CAS. You will have your own.
## General form: swat.CAS(host, port, username, password)
conn = connect_to_cas() 

In [None]:
## Default argument of the function is rowsPerThread = 100. You can modify the amount of rows by changing the value.
## WARNING: A large number of rows per thread can take awhile depending on the number specified and your environment.

def fakeData(connection, rowsPerThread = 100):
    ds = f'''
data casuser.orders;
     length Product varchar(10) 
            Country varchar(2)
            OrderDate 8.
            DiscountCode varchar(10) 
            Return varchar(3);
      call streaminit(99);
            do i=1 to {rowsPerThread};
            *StoreID*;
                StoreID=int(rand('CHISQ', 20));
        
            *Country*;
                array country_groups[5] varchar(10) _temporary_ ("GR","US","AU","EN", "CA");   
                rand_Countries=rand("table",.2, .4, .1, .1, .2);
                Country=country_groups[rand_Countries];
        
            *Product*;
                array products_groups[4] varchar(10) _temporary_ ("Sweatshirt","Pants","Shirts","Hats");   
                rand_Products=rand("table",.2,.3,.4,.1);
                Product=products_groups[rand_Products];
        
            *Order date*;
                array year_groups[5]  _temporary_ (2017, 2018, 2019, 2020, 2021);   
                rand_year=rand("table",.1,.15,.22,.20,.33);
                OrderDate=int(rand('uniform',mdy(1,1,year_groups[rand_year]), mdy(12,31,year_groups[rand_year])));
        
            *quantity*;
                Quantity=round(rand('uniform', 20, 500),5);
        
            *product price - customer price*;
            array products_price[4] _temporary_ (10.99,8.99,7.99,4.99);
                Price=products_price[rand_Products];
                  if Quantity > 400 then Price = round(Price * .8,.01);
                    else if Quantity > 300 then Price = round(Price * .85,.01);
                    else if Quantity > 200 then Price = round(Price * .9,.01);
                    else if Quantity > 100 then Price = round(Price * .95,.01);
        
            *product cost - cost to make*;
            array products_cost[4] _temporary_ (1.99,1.49,1.99,.99);      
                Cost=products_cost[rand_Products];
        
            *return*;
            rand_return=rand('uniform',0,1);
                if (product="Sweatshirt" and rand_return<.02) then Return="Yes";
                else if (product="Pants" and rand_return<.05) then Return="Yes";
                else if (product="Shirts" and rand_return<.08) then Return="Yes";
                else if (product="Hats" and rand_return<.01) then Return="Yes";
                else Return="";
        
            *discount code*;
            rand_discountValue=rand("table",.3,.15,.25,.09,.01,.15,.05);
            rand_discountApplied=rand('uniform',0,1);
            array products_discounts[7] varchar(10) _temporary_ ("TC10","BB20","TENOFF","EMP50","FMDISCOUNT","SPC","FREEDEAL");  
            if rand_DiscountAPplied <.20 then DiscountCode=products_discounts[rand_discountValue];
                else DiscountCode="";
        
            output;
               
        end;
        format OrderDate date9.;
        drop i rand:;
        run;
'''
    conn.runCode(code = ds)
    
    n = conn.numRows(table={'name':'orders', 'caslib':'casuser'})['numrows']
    print(f"The orders CAS table was created with {n:,} rows")
    
    conn.save(table = 'orders', name = 'ordersDemo.csv', replace = True)
    print('The orders CAS table was saved as the ordersDemo.csv file')
    
    conn.dropTable(name = 'orders', caslib = 'casuser')
    print('The orders CAS table was dropped')

In [None]:
## Execute the function
## Specify CAS connection name and number of rows to create per thread

fakeData(conn, rowsPerThread = 100)

In [None]:
conn.terminate()