### Load Modules

In [None]:
using Random
using AutoMLPipeline
using DataFrames
using AbstractTrees
ENV["COLUMNS"]=1000;

### Load dataset
- pro-football dataset
- predict if the game is played home/away

In [None]:
using CSV
profbdata = CSV.read("profb.csv",DataFrame)
first(profbdata,5)

### Split data into input features and target output

In [None]:
X = profbdata[:,2:end] 
Y = profbdata[:,1] |> Vector;


### Load the building blocks of modeling

In [None]:
# decomposition
pca = SKPreprocessor("PCA")
fa = SKPreprocessor("FactorAnalysis")
ica = SKPreprocessor("FastICA")
# Scaler 
rb = SKPreprocessor("RobustScaler")
pt = SKPreprocessor("PowerTransformer")
norm = SKPreprocessor("Normalizer")
mx = SKPreprocessor("MinMaxScaler")
# categorical preprocessing
ohe = OneHotEncoder()
# Column selector
disc = CatNumDiscriminator()
catf = CatFeatureSelector()
numf = NumFeatureSelector()
# Learners
rf = SKLearner("RandomForestClassifier")
gb = SKLearner("GradientBoostingClassifier")
lsvc = SKLearner("LinearSVC")
svc = SKLearner("SVC")
mlp = SKLearner("MLPClassifier")
ada = SKLearner("AdaBoostClassifier");
jrf = RandomForest();
vote = VoteEnsemble();
stack = StackEnsemble();
best = BestLearner();

### Define a pipeline composed of transformers and a learner at the end
#### Symbolic expression exploiting Julia Macro (allows symbolic manipulation)

In [None]:
pvote = @pipeline  (catf |> ohe) + (numf) |> vote
pred = fit_transform!(pvote,X,Y)
score(:accuracy,pred,Y)

In [None]:
a=:(2+2)

In [None]:
eval(a)

In [None]:
@pipelinex (catf |> ohe) + (numf) |> vote

In [None]:
print_tree(@pipelinex (catf |> ohe) + (numf) |> vote)

In [None]:
pohe = @pipeline  numf + catf |> ohe 
pred = fit_transform!(pvote,X,Y)

#### Corresponding function call

In [None]:
@pipelinex (catf |> ohe) + (numf) |> vote

In [None]:
(@pipelinex (catf |> ohe) + (numf) |> vote) |> print_tree

### Evaluate performance by 5-fold cross-validation

In [None]:
crossvalidate(pvote,X,Y,"accuracy_score",5)

### Use similar workflow to discover optimal pipeline

### RandomForest learner

In [None]:
prf = @pipeline  (numf |> rb |> pca) + (numf |> rb |> ica) + (catf |> ohe) + (numf |> rb |> fa) |> rf
pred = fit_transform!(prf,X,Y)
score(:accuracy,pred,Y)

In [None]:
@pipelinex (numf |> rb |> pca) + (numf |> rb |> ica) + (catf |> ohe) + (numf |> rb |> fa) |> rf

In [None]:
crossvalidate(prf,X,Y,"accuracy_score",5)

### Gradient Boost Learner

In [None]:
pgb = @pipeline (numf |> rb |> pca) + (numf|>rb|>ica)  + (numf|>rb|>fa) |> gb
pred = fit_transform!(pgb,X,Y)
score(:accuracy,pred,Y)

In [None]:
crossvalidate(pgb,X,Y,"accuracy_score",5)

### Linear Support Vector Machine for Classification

In [None]:
plsvc = @pipeline ((numf |> rb |> pca)+(numf |> rb |> fa)+(numf |> rb |> ica)+(catf |> ohe )) |> lsvc
pred = fit_transform!(plsvc,X,Y)
score(:accuracy,pred,Y)

In [None]:
crossvalidate(plsvc,X,Y,"accuracy_score",5)

### RBF Kernel Support Vector Machine for Classification

In [None]:
psvc = @pipeline (numf |> pca) + numf + (numf|> ica) + (catf |> ohe) |> svc
pred = fit_transform!(psvc,X,Y)
score(:accuracy,pred,Y)

In [None]:
crossvalidate(psvc,X,Y,"accuracy_score",5)