In [None]:
using HTTP, MLJ, DataFrames, UrlDownload

In [None]:
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
header = ["Class", "Alcool", "Malic acid", "Ash", "Alcalinity of ash",
          "Magnesium", "Total phenols", "Flavanoids",
          "Nonflavanoid phenols", "Proanthcyanins", "Color intensity",
          "Hue", "OD280/OD315 of diluted wines", "Proline"]
data = urldownload(url, true, format=:CSV, header=header);

In [None]:
data = DataFrame(data)

# Manipulating data

In [None]:
describe(data)

In [None]:
methods(describe)

In [None]:
using Plots
pyplot()
histogram(data[:,:Magnesium],leg=false)

In [None]:
islabel = c -> c==:Class
isdata = c -> c!=:Class
y,X = unpack(data,islabel,isdata);

In [None]:
y

In [None]:
scitype(y)

In [None]:
y = coerce(y,OrderedFactor);

In [None]:
scitype(y)

In [None]:
schema(X)

In [None]:
coerce!(X,:Proline=>Continuous, :Magnesium=>Continuous);
schema(X)

# PCA

In [None]:
models()

In [None]:
models(matching(X))

In [None]:
info("PCA")

In [None]:
@load PCA pkg="MultivariateStats"

In [None]:
pca_pipe = @pipeline Standardizer() MLJMultivariateStatsInterface.PCA(maxoutdim=3)

In [None]:
pca = machine(pca_pipe,X)

In [None]:
fit!(pca)

In [None]:
report(pca)

In [None]:
transform(pca)

In [None]:
x1,x2,x3 = MLJ.transform(pca) |> eachcol;

In [None]:
x1

In [None]:
scatter(x1,x2,legend=false)

In [None]:
colr = ["red" "blue" "magenta"]
scatter(x1,x2,color=colr[coerce(y,Count)],leg=false)

# k-Means clustering

In [None]:
@load KMeans pkg="Clustering"

In [None]:
km_pipe = @pipeline Standardizer() MLJClusteringInterface.KMeans()

In [None]:
km = machine(km_pipe,X)

In [None]:
fit!(km)

In [None]:
report(km)

In [None]:
ans.k_means

In [None]:
clus = ans.assignments;

In [None]:
colr = ["red" "blue" "magenta"]
scatter(x1,x2,color=colr[clus],leg=false)

In [None]:
colr = ["red" "blue" "magenta"]
scatter(x1,x2,color=colr[coerce(y,Count)],leg=false)

# k-Nearest Neighbors

In [None]:
@load KNNClassifier

In [None]:
pipe = @pipeline Standardizer() NearestNeighborModels.KNNClassifier()

In [None]:
train,test = partition(eachindex(y),0.8,shuffle=true)

In [None]:
Xtrain,ytrain = X[train,:],y[train];
Xtest,ytest = X[test,:],y[test];

In [None]:
knn = machine( pipe, Xtrain, ytrain )

In [None]:
fit!(knn)

In [None]:
evaluate!(knn,resampling=CV(nfolds=5))

In [None]:
report(knn)

In [None]:
predict(knn,Xtest)

In [None]:
ŷ = predict_mode(knn,Xtest)

In [None]:
count(ŷ.==ytest) / length(ytest)

In [None]:
ConfusionMatrix()(predict_mode(knn,X),y)