## Julia Interfacing with R and Python

In [None]:
using PyCall
using DecisionTree
using DataFrames
using Statistics
ENV["COLUMNS"]=1000; # for dataframe column size

### py"" String Macro

- py"" executes Python commands with implicit conversion of python objects into Julia data type.

In [None]:
py"1+1" # returns julia numeric value by implicit conversion

### Defining a function in Python

In [None]:
py"""

import numpy as np

def mysin(x):
   return np.sin(x)
"""

In [None]:
py"mysin(10)" # calling python function

In [None]:
py"np.sin(10)"  # calling directly numpy sin function

In [None]:
pyimport("numpy").sin(10) # import numpy and call sin directly

### Plotting with Python

In [None]:
using PyPlot
x = range(0; stop=2*pi, length=1000); y = sin.(3 * x + 4 * cos.(2 * x));
plot(x, y, color="red", linewidth=2.0, linestyle="--")
title("A sinusoidally modulated sinusoid")

###   Scikitlearn classifiers using Julia bindings of Python (PyCall.jl)

Let's load the `car` dataset for this example.

In [None]:
using VegaDatasets
cars=dataset("cars") |> DataFrame;
first(cars,5)

In [None]:
cars = cars[completecases(cars),:]; # remove rows with missing values

In [None]:
ENS = pyimport("sklearn.ensemble")
learner=ENS.RandomForestClassifier()

x=cars[:,[2,4,5,6,7]] |> Array
y=cars[:,:Cylinders] .|> string

model=learner.fit(x, y)
yhat = model.predict(x)
accuracy = sum(yhat .== y) / length(y)
println("accuracy: $accuracy")

In [None]:
using ScikitLearn.CrossValidation: cross_val_score

KNN = pyimport("sklearn.neighbors")
modelKNN = KNN.KNeighborsClassifier(3)

cross_val_score(modelKNN, x, y; cv=10)  # 5-fold

In [None]:
using PyCall
const EN=pyimport("sklearn.ensemble")
const LM=pyimport("sklearn.linear_model")
const DA=pyimport("sklearn.discriminant_analysis")
const NN=pyimport("sklearn.neighbors")
const SVM=pyimport("sklearn.svm")
const TREE=pyimport("sklearn.tree")
const ANN=pyimport("sklearn.neural_network")
const GP=pyimport("sklearn.gaussian_process")
const KR=pyimport("sklearn.kernel_ridge")
const NB=pyimport("sklearn.naive_bayes")
const ISO=pyimport("sklearn.isotonic")

In [None]:
learners = Dict(
  "AdaBoostClassifier" => ENS.AdaBoostClassifier,
  "BaggingClassifier" => ENS.BaggingClassifier,
  "ExtraTreesClassifier" => ENS.ExtraTreesClassifier,
  "VotingClassifier" => ENS.VotingClassifier,
  "GradientBoostingClassifier" => ENS.GradientBoostingClassifier,
  "RandomForestClassifier" => ENS.RandomForestClassifier,
  "LDA" => DA.LinearDiscriminantAnalysis,
  "QDA" => DA.QuadraticDiscriminantAnalysis,
  "LogisticRegression" => LM.LogisticRegression,
  "PassiveAggressiveClassifier" => LM.PassiveAggressiveClassifier,
  "RidgeClassifier" => LM.RidgeClassifier,
  "RidgeClassifierCV" => LM.RidgeClassifierCV,
  "SGDClassifier" => LM.SGDClassifier,
  "KNeighborsClassifier" => NN.KNeighborsClassifier,
  "RadiusNeighborsClassifier" => NN.RadiusNeighborsClassifier,
  "NearestCentroid" => NN.NearestCentroid,
  "SVC" => SVM.SVC,
  "LinearSVC" => SVM.LinearSVC,
  "NuSVC" => SVM.NuSVC,
  "MLPClassifier" => ANN.MLPClassifier,
  "GaussianProcessClassifier" => GP.GaussianProcessClassifier,
  "DecisionTreeClassifier" => TREE.DecisionTreeClassifier,
  "GaussianNB" => NB.GaussianNB,
  "MultinomialNB" => NB.MultinomialNB,
  "ComplementNB" => NB.ComplementNB,
  "BernoulliNB" => NB.BernoulliNB
);

In [None]:
function sktrain(learner,x,y)
    model = learner()
    xx = x |> Matrix
    yy = y |> Array
    model.fit(xx,yy)
    return model
end

In [None]:
function skpredict(themodel,x)
    xx = x |> Matrix
    return themodel.predict(xx) |> collect
end

In [None]:
using Random
rng = MersenneTwister(1234);
ndx = shuffle(rng, Vector(1:nrow(cars)));

In [None]:
xtrain=cars[ndx[1:100],[2,4,5,6,7]] |> Matrix
ytrain=cars[ndx[1:100],:Cylinders] .|> string;
xtest=cars[ndx[101:end],[2,4,5,6,7]] |> Matrix
ytest=cars[ndx[101:end],:Cylinders] .|> string;

### Try ExtraTrees Classifier

In [None]:
modelLR = sktrain(learners["ExtraTreesClassifier"],xtrain,ytrain)
yhat = skpredict(modelLR,xtest);
acc=sum(yhat .== ytest) / length(yhat)

### Try AdaBoost Classifier

In [None]:
modelADA = sktrain(learners["AdaBoostClassifier"],x,y)
res = skpredict(modelADA,x);
acc=sum(res .== y) / length(y)

### Try RandomForest Classifier

In [None]:
modelRF = sktrain(learners["RandomForestClassifier"],x,y)
res = skpredict(modelRF,x);
acc=sum(res .== y) / length(y)

In [None]:
cross_val_score(modelLR, x, y; cv=10) |> mean  # 5-fold