## Instalar a biblioteca do `tidyverse`

Se a biblioteca `tidyverse` e `e1071` não estiverem instaladas, pode ser instalada pelo repositório do CRAN (isto deve ser feito fora do `jupyter`):

```
install.packages("tidyverse")
install.packages("e1071")
```

### Importar a biblioteca do `tidyverse` e do `e1071`

In [1]:
library(tidyverse)
library(e1071)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.2     [32m✔[39m [34mdplyr  [39m 1.0.0
[32m✔[39m [34mtidyr  [39m 1.1.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



### Importando os dados

In [2]:
# Import data
data_mat <- read_csv2('../dados/student-mat.csv',
                 col_types = cols(.default = col_character()))
data_port <- read_csv2('../dados/student-por.csv',
                 col_types = cols(.default = col_character()))

Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.

Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.



### Conferindo o número de amostras

In [3]:
# Number os samples
nrow(data_port)
nrow(data_mat)

---
### Verificando os dados

In [4]:
# Show dataset
head(data_port)
head(data_mat)

school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,⋯,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
GP,F,18,U,GT3,A,4,4,at_home,teacher,⋯,4,3,4,1,1,3,4,0,11,11
GP,F,17,U,GT3,T,1,1,at_home,other,⋯,5,3,3,1,1,3,2,9,11,11
GP,F,15,U,LE3,T,1,1,at_home,other,⋯,4,3,2,2,3,3,6,12,13,12
GP,F,15,U,GT3,T,4,2,health,services,⋯,3,2,2,1,1,5,0,14,14,14
GP,F,16,U,GT3,T,3,3,other,other,⋯,4,3,2,1,2,5,0,11,13,13
GP,M,16,U,LE3,T,4,3,services,other,⋯,5,4,2,1,2,5,6,12,12,13


school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,⋯,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
GP,F,18,U,GT3,A,4,4,at_home,teacher,⋯,4,3,4,1,1,3,6,5,6,6
GP,F,17,U,GT3,T,1,1,at_home,other,⋯,5,3,3,1,1,3,4,5,5,6
GP,F,15,U,LE3,T,1,1,at_home,other,⋯,4,3,2,2,3,3,10,7,8,10
GP,F,15,U,GT3,T,4,2,health,services,⋯,3,2,2,1,1,5,2,15,14,15
GP,F,16,U,GT3,T,3,3,other,other,⋯,4,3,2,1,2,5,4,6,10,10
GP,M,16,U,LE3,T,4,3,services,other,⋯,5,4,2,1,2,5,10,15,15,15


---
### Tabela com estatísticas

#### Português

In [5]:
data_port %>%
    type_convert() %>%
    select(age, absences, G1, G2, G3) %>%
    pivot_longer(everything(), names_to = "Feature", values_to = "Value") %>%
    group_by(Feature) %>%
    summarize(Min = min(Value),
              Max = max(Value),
              Range = max(Value) - min(Value),
              Q1 = quantile(Value, .25),
              Q2 = quantile(Value, .50),
              Q3 = quantile(Value, .75),
              Std = sd(Value),
              Mean = mean(Value),
              Var = var(Value),
              Skew = skewness(Value),
              Kurt = kurtosis(Value))

Parsed with column specification:
cols(
  .default = col_character(),
  age = [32mcol_double()[39m,
  Medu = [32mcol_double()[39m,
  Fedu = [32mcol_double()[39m,
  traveltime = [32mcol_double()[39m,
  studytime = [32mcol_double()[39m,
  failures = [32mcol_double()[39m,
  famrel = [32mcol_double()[39m,
  freetime = [32mcol_double()[39m,
  goout = [32mcol_double()[39m,
  Dalc = [32mcol_double()[39m,
  Walc = [32mcol_double()[39m,
  health = [32mcol_double()[39m,
  absences = [32mcol_double()[39m,
  G1 = [32mcol_double()[39m,
  G2 = [32mcol_double()[39m,
  G3 = [32mcol_double()[39m
)

See spec(...) for full column specifications.

`summarise()` ungrouping output (override with `.groups` argument)



Feature,Min,Max,Range,Q1,Q2,Q3,Std,Mean,Var,Skew,Kurt
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
absences,0,32,32,0,2,6,4.640759,3.659476,21.536642,2.01136265,5.70053028
age,15,22,7,16,17,18,1.218138,16.744222,1.483859,0.414870724,0.05230014
G1,0,19,19,10,11,13,2.745265,11.399076,7.536481,-0.002760829,0.0178044
G2,0,19,19,10,11,13,2.913639,11.570108,8.48929,-0.358618952,1.62616438
G3,0,19,19,10,12,14,3.230656,11.906009,10.43714,-0.90869377,2.66462641


#### Matemática

In [6]:
data_mat %>%
    type_convert() %>%
    select(age, absences, G1, G2, G3) %>%
    pivot_longer(everything(), names_to = "Feature", values_to = "Value") %>%
    group_by(Feature) %>%
    summarize(Min = min(Value),
              Max = max(Value),
              Range = max(Value) - min(Value),
              Q1 = quantile(Value, .25),
              Q2 = quantile(Value, .50),
              Q3 = quantile(Value, .75),
              Std = sd(Value),
              Mean = mean(Value),
              Var = var(Value),
              Skew = skewness(Value),
              Kurt = kurtosis(Value))

Parsed with column specification:
cols(
  .default = col_character(),
  age = [32mcol_double()[39m,
  Medu = [32mcol_double()[39m,
  Fedu = [32mcol_double()[39m,
  traveltime = [32mcol_double()[39m,
  studytime = [32mcol_double()[39m,
  failures = [32mcol_double()[39m,
  famrel = [32mcol_double()[39m,
  freetime = [32mcol_double()[39m,
  goout = [32mcol_double()[39m,
  Dalc = [32mcol_double()[39m,
  Walc = [32mcol_double()[39m,
  health = [32mcol_double()[39m,
  absences = [32mcol_double()[39m,
  G1 = [32mcol_double()[39m,
  G2 = [32mcol_double()[39m,
  G3 = [32mcol_double()[39m
)

See spec(...) for full column specifications.

`summarise()` ungrouping output (override with `.groups` argument)



Feature,Min,Max,Range,Q1,Q2,Q3,Std,Mean,Var,Skew,Kurt
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
absences,0,75,75,0,4,8,8.003096,5.708861,64.049541,3.6437406,21.30650507
age,15,22,7,16,17,18,1.276043,16.696203,1.628285,0.4627348,-0.03144581
G1,3,19,16,8,11,13,3.319195,10.908861,11.017053,0.2387889,-0.71185911
G2,0,19,19,9,11,13,3.761505,10.713924,14.148917,-0.4283726,0.58640838
G3,0,20,20,8,11,14,4.581443,10.41519,20.989616,-0.7271171,0.36607243
