<a href="https://colab.research.google.com/github/lcbjrrr/quant/blob/master/J_Class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Topic:** AI/ML

**Title:** Classifiers (Decision Tree)

**Author:** Luiz Barboza

**Date:** 20/dec/22

**Lang:** Julia

**Site:** https://quant-research.group/

**Email:** contato@quant-research.group


# Julia Installation

In [None]:
%%shell
set -e

#---------------------------------------------------#
JULIA_VERSION="1.8.3" # any version ≥ 0.7.0
JULIA_PACKAGES="IJulia BenchmarkTools"
JULIA_PACKAGES_IF_GPU="CUDA" # or CuArrays for older Julia versions
JULIA_NUM_THREADS=2
#---------------------------------------------------#

if [ -z `which julia` ]; then
  # Install Julia
  JULIA_VER=`cut -d '.' -f -2 <<< "$JULIA_VERSION"`
  echo "Installing Julia $JULIA_VERSION on the current Colab Runtime..."
  BASE_URL="https://julialang-s3.julialang.org/bin/linux/x64"
  URL="$BASE_URL/$JULIA_VER/julia-$JULIA_VERSION-linux-x86_64.tar.gz"
  wget -nv $URL -O /tmp/julia.tar.gz # -nv means "not verbose"
  tar -x -f /tmp/julia.tar.gz -C /usr/local --strip-components 1
  rm /tmp/julia.tar.gz

  # Install Packages
  nvidia-smi -L &> /dev/null && export GPU=1 || export GPU=0
  if [ $GPU -eq 1 ]; then
    JULIA_PACKAGES="$JULIA_PACKAGES $JULIA_PACKAGES_IF_GPU"
  fi
  for PKG in `echo $JULIA_PACKAGES`; do
    echo "Installing Julia package $PKG..."
    julia -e 'using Pkg; pkg"add '$PKG'; precompile;"' &> /dev/null
  done

  # Install kernel and rename it to "julia"
  echo "Installing IJulia kernel..."
  julia -e 'using IJulia; IJulia.installkernel("julia", env=Dict(
      "JULIA_NUM_THREADS"=>"'"$JULIA_NUM_THREADS"'"))'
  KERNEL_DIR=`julia -e "using IJulia; print(IJulia.kerneldir())"`
  KERNEL_NAME=`ls -d "$KERNEL_DIR"/julia*`
  mv -f $KERNEL_NAME "$KERNEL_DIR"/julia  

  echo ''
  echo "Successfully installed `julia -v`!"
  echo "Please reload this page (press Ctrl+R, ⌘+R, or the F5 key) then"
  echo "jump to the 'Checking the Installation' section."
fi

Installing Julia 1.8.3 on the current Colab Runtime...
2022-12-21 14:33:05 URL:https://storage.googleapis.com/julialang2/bin/linux/x64/1.8/julia-1.8.3-linux-x86_64.tar.gz [130030846/130030846] -> "/tmp/julia.tar.gz" [1]
Installing Julia package IJulia...
Installing Julia package BenchmarkTools...


In [1]:
versioninfo()

Julia Version 1.8.3
Commit 0434deb161e (2022-11-14 20:14 UTC)
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 2 × Intel(R) Xeon(R) CPU @ 2.20GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-13.0.1 (ORCJIT, broadwell)
  Threads: 2 on 2 virtual cores
Environment:
  LD_LIBRARY_PATH = /usr/local/nvidia/lib:/usr/local/nvidia/lib64
  LD_PRELOAD = /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
  JULIA_NUM_THREADS = 2


# Classifiers

In [None]:
import Pkg
Pkg.add("CSV")
Pkg.add("DataFrames")
Pkg.add("Statistics")
Pkg.add("StatsPlots")
Pkg.add("Lathe") 
Pkg.add("DecisionTree") 

using CSV
using DataFrames
using Statistics
using StatsPlots
using Lathe
using Lathe.preprocess: TrainTestSplit
using DecisionTree


In [None]:
;wget https://raw.githubusercontent.com/lcbjrrr/data/main/gender%20-%20all.csv

In [4]:
#read csv
df=CSV.read("gender - all.csv", DataFrame)
first(df,5)

Unnamed: 0_level_0,G,H,W
Unnamed: 0_level_1,Int64,Float64,Float64
1,0,187.571,109.952
2,0,174.706,73.7775
3,0,188.24,96.7004
4,0,182.197,100.019
5,0,177.5,93.7954


## Decision Tree

In [5]:
train, test = TrainTestSplit(df,.80)

([1m7991×3 DataFrame[0m
[1m  Row [0m│[1m G     [0m[1m H       [0m[1m W        [0m
[1m      [0m│[90m Int64 [0m[90m Float64 [0m[90m Float64  [0m
──────┼──────────────────────────
    1 │     0  174.706   73.7775
    2 │     0  188.24    96.7004
    3 │     0  182.197  100.019
    4 │     0  177.5     93.7954
    5 │     0  174.714   83.6036
    6 │     0  173.605   76.3505
    7 │     0  170.228   79.9679
    8 │     0  161.179   71.0908
    9 │     0  180.836   84.8204
   10 │     0  181.968   97.1551
   11 │     0  164.506   75.967
  ⋮   │   ⋮       ⋮        ⋮
 7982 │     1  151.228   55.1113
 7983 │     1  154.826   43.4939
 7984 │     1  160.476   64.2119
 7985 │     1  159.097   46.7516
 7986 │     1  157.678   63.0417
 7987 │     1  152.477   44.4034
 7988 │     1  168.079   62.1716
 7989 │     1  162.225   58.3979
 7990 │     1  175.347   74.4784
 7991 │     1  157.338   51.6587
[36m                7970 rows omitted[0m, [1m2009×3 DataFrame[0m
[1m  Row [0m│

In [6]:
tree = DecisionTreeClassifier(max_depth=3)
DecisionTree.fit!(tree, Matrix(train[:,[:H,:W]]), train[:,:G])     

DecisionTreeClassifier
max_depth:                3
min_samples_leaf:         1
min_samples_split:        2
min_purity_increase:      0.0
pruning_purity_threshold: 1.0
n_subfeatures:            0
classes:                  [0, 1]
root:                     Decision Tree
Leaves: 8
Depth:  3

In [9]:
function accuracy(y,pred)
  acc = sum(pred.==y)/length(y)
  return acc
end

accuracy (generic function with 1 method)

In [12]:
pred_train = DecisionTree.predict(tree,Matrix(train[:,[:H,:W]]))
acc_train = accuracy(train.G,pred_train)
print("Accuracy (Train): ",acc_train)

Accuracy (Train): 0.907395820297835

In [15]:
pred_test=DecisionTree.predict(tree,Matrix(test[:,[:H,:W]]))
acc_test = accuracy(test.G,pred_test)
print("Accuracy (Test): ",acc_test)

Accuracy (Test): 0.9118964659034345