# R: Explore Sales Data from Enterprise Systems through Machine Learning

## R implementation for association rules mining (arules package)

Initializing libraries

In [None]:
library(arules)
library(plyr)
library(ibmdbR)

Loading the data from dashdb

In [4]:
# The code was removed by DSX for sharing.

In [5]:
con.text <- paste(";Database=",credentials$db,
                  ";Hostname=",credentials$hostname,
                  ";Port=",credentials$port,
                  ";PROTOCOL=TCPIP",
                  ";UID=",credentials$username,
                  ";PWD=",credentials$password,sep="")
con <- idaConnect(con.text)

In [7]:
df <- idadf(con,"SELECT * FROM DASH8382.TRANSACTIONS")
print('A sample of loaded data')
head(df)

[1] "A sample of loaded data"


PRODUCT_LINE,PRODUCT_TYPE,CUST_ORDER_NUMBER,CITY,STATE,COUNTRY,GENDER,AGE,MARITAL_STATUS,PROFESSION
Personal Accessories,Watches,172860,Cixi,,China,M,36,Married,Professional
Golf Equipment,Irons,172862,Hillingdon,,United Kingdom,M,48,Married,Trades
Golf Equipment,Putters,172862,Hillingdon,,United Kingdom,M,48,Married,Trades
Personal Accessories,Eyewear,172865,Venezia,,Italy,F,19,Single,Other
Mountaineering Equipment,Rope,172867,New Delhi,NCT,India,F,25,Single,Other
Golf Equipment,Irons,172869,Kushiro,,Japan,M,55,Unspecified,Other


In [8]:
# close the database connection
idaClose(con)

Data wrangling with R: putting all the purchased items into a single transaction

In [9]:
# using subset of fields: the values in the result set are unique
ordersDF <- ddply(df,c('CUST_ORDER_NUMBER','PRODUCT_LINE'),summarize, unused=sum(1))
ordersDF <- subset(ordersDF , select = c('CUST_ORDER_NUMBER','PRODUCT_LINE'))
# merging multiple lines related to the same order inta a single line
ordersDF <- aggregate( PRODUCT_LINE ~ CUST_ORDER_NUMBER, data = ordersDF, paste, collapse = ",")
# getting an list of items istead of a comma separated strings
ordersDF <- dlply(ordersDF, 1, function(items) (strsplit(items$PRODUCT_LINE, split=",")))

transactions <- unlist(ordersDF, recursive = FALSE, use.names = FALSE)
# filtering the transaction with multiple purchased items
transactions <- transactions [lapply(transactions , length) > 1]
print('Sample of transactions ready for Apriori algorithm')
head(transactions)


[1] "Sample of transactions ready for Apriori algorithm"


Transforming data according to arules requirements and applying Apriori algorithm

In [10]:
transactions <- as(transactions, "transactions")

In [11]:
library(arules)

rulesMod <- apriori(transactions, parameter=list(support=0.05, conf=0.2, minlen=2))
rulesMod <- sort(rulesMod, decreasing=T, by="lift")

inspect(rulesMod)

Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen
        0.2    0.1    1 none FALSE            TRUE       5    0.05      2
 maxlen target   ext
     10  rules FALSE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 322 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[5 item(s), 6443 transaction(s)] done [0.00s].
sorting and recoding items ... [5 item(s)] done [0.00s].
creating transaction tree ... done [0.01s].
checking subsets of size 1 2 3 done [0.00s].
writing ... [12 rule(s)] done [0.00s].
creating S4 object  ... done [0.00s].
     lhs                           rhs                        support   
[1]  {Golf Equipment}           => {Personal Accessories}     0.09514201
[2]  {Outdoor Protection}       => {Camping Equipment}        0.20037250
[3]  {Camping Equipment}        => {Outdoor Protection}       0.200372

## Visualizing association rules

Brunel-based visualization

In [12]:
#Transforming the rules into a data frame
rulesSet <- as(rulesMod, "data.frame") 

rulesSet$rules <- gsub("\\{|\\}", "", rulesSet$rules)
rulesSet <- transform(rulesSet, col=do.call(rbind, strsplit(as.character(rules), '=>', fixed=TRUE)), stringsAsFactors=FALSE)

rulesSet <- rename(rulesSet, c("col.1"="LHS", "col.2"="RHS"))

In [13]:
library(brunel)

brunel (" data('rulesSet') chord x(LHS) y(RHS) color(confidence) size(support) tooltip(rules, support, confidence, lift)",
        width=600, height=400, online_js=TRUE)