In [2]:
import pandas as pd
import numpy as np
# ^^^ pyforest auto-imports - don't write above this line
import numpy as np
import pandas as pd

# Data Cleaning Review

In [25]:
df = pd.DataFrame({'cpf':[1,2,3,3,4,4,5], 'vlr':[10,20,30,40,50,50, np.nan]}) 
df

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,cpf,vlr
0,1,10.0
1,2,20.0
2,3,30.0
3,3,40.0
4,4,50.0
5,4,50.0
6,5,


In [30]:
df.isna().sum()

cpf    0
vlr    1
dtype: int64

In [27]:
mask = df.isna().sum() > 0
mask

cpf    False
vlr     True
dtype: bool

In [32]:
mask.loc[mask].index

Index(['vlr'], dtype='object')

In [33]:
df.loc[:, mask.loc[mask].index]

Unnamed: 0,vlr
0,10.0
1,20.0
2,30.0
3,40.0
4,50.0
5,50.0
6,


In [14]:
df.duplicated(subset=['cpf'], keep=False)

0    False
1    False
2     True
3     True
4     True
5     True
dtype: bool

In [10]:
df.drop_duplicates(subset=['cpf'], keep=False)

Unnamed: 0,cpf,vlr
0,1,10
1,2,20
2,3,30
3,3,40
4,4,50


- `isna()`/`isnull()` --> retorna uma máscara booleana que vai ser True onde o valor for missing.
- `fillna()` --> completa os missings com o argumento que você colocar (retorna um dataframe)
- `duplicated()` --> retorna True se for duplicado (por default, True a partir da segunda repetição). Outro argumento importante era o `subset`.

- `drop_duplicates()` --> remove as linhas duplicadas (remove as linhas que o `duplicated()` retorna True)

- Mascaras na verdade nada mais são do que Series ou DataFrames - por isso conseguimos trabalhar neles da mesma forma, inclusive criando novas máscaras a partir deles - por exemplo quando usamos:

```python
mask = vehicles.isna()

vehicles.loc[mask.mean() < 0.8, :]

```

# DataFrame Calculations

## Read DataFrame

Description: 

http://lib.stat.cmu.edu/datasets/sleep

In [34]:
animals = pd.read_csv('http://www.statsci.org/data/general/sleep.txt', sep='\t')

<IPython.core.display.Javascript object>

In [35]:
animals.head()

Unnamed: 0,Species,BodyWt,BrainWt,NonDreaming,Dreaming,TotalSleep,LifeSpan,Gestation,Predation,Exposure,Danger
0,Africanelephant,6654.0,5712.0,,,3.3,38.6,645.0,3,5,3
1,Africangiantpouchedrat,1.0,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1
3,Arcticgroundsquirrel,0.92,5.7,,,16.5,,25.0,5,2,3
4,Asianelephant,2547.0,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4


In [36]:
animals.describe()

Unnamed: 0,BodyWt,BrainWt,NonDreaming,Dreaming,TotalSleep,LifeSpan,Gestation,Predation,Exposure,Danger
count,62.0,62.0,48.0,50.0,58.0,58.0,58.0,62.0,62.0,62.0
mean,198.789984,283.134194,8.672917,1.972,10.532759,19.877586,142.353448,2.870968,2.419355,2.612903
std,899.158011,930.278942,3.666452,1.442651,4.60676,18.206255,146.805039,1.476414,1.604792,1.441252
min,0.005,0.14,2.1,0.0,2.6,2.0,12.0,1.0,1.0,1.0
25%,0.6,4.25,6.25,0.9,8.05,6.625,35.75,2.0,1.0,1.0
50%,3.3425,17.25,8.35,1.8,10.45,15.1,79.0,3.0,2.0,2.0
75%,48.2025,166.0,11.0,2.55,13.2,27.75,207.5,4.0,4.0,4.0
max,6654.0,5712.0,17.9,6.6,19.9,100.0,645.0,5.0,5.0,5.0


In [38]:
animals['coluna_exemplo'] = 10

In [40]:
animals

Unnamed: 0,Species,BodyWt,BrainWt,NonDreaming,Dreaming,TotalSleep,LifeSpan,Gestation,Predation,Exposure,Danger,coluna_exemplo
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,3,10
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,3,10
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10
...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,3,10
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,2,10
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,4,10
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,1,10


# DataFrame Calculations

## Performing calculations to create a new column 

### Using constants

In [41]:
animals['nova_coluna'] = 1

In [42]:
animals

Unnamed: 0,Species,BodyWt,BrainWt,NonDreaming,Dreaming,TotalSleep,LifeSpan,Gestation,Predation,Exposure,Danger,coluna_exemplo,nova_coluna
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,3,10,1
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,3,10,1
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,3,10,1
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,2,10,1
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,4,10,1
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,1,10,1


## From lists

In [46]:
animals['segunda_coluna'] = [item ** 2 for item in range(62)]

In [49]:
animals

Unnamed: 0,Species,BodyWt,BrainWt,NonDreaming,Dreaming,TotalSleep,LifeSpan,Gestation,Predation,Exposure,Danger,coluna_exemplo,nova_coluna,segunda_coluna
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,3,10,1,9
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,3,10,1,3249
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,2,10,1,3364
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,4,10,1,3481
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,1,10,1,3600


In [53]:
animals.columns

Index(['Species', 'BodyWt', 'BrainWt', 'NonDreaming', 'Dreaming', 'TotalSleep',
       'LifeSpan', 'Gestation', 'Predation', 'Exposure', 'Danger',
       'coluna_exemplo', 'nova_coluna', 'segunda_coluna'],
      dtype='object')

In [56]:
animals.columns

Index(['species', 'bodywt', 'brainwt', 'nondreaming', 'dreaming', 'totalsleep',
       'lifespan', 'gestation', 'predation', 'exposure', 'danger',
       'coluna_exemplo', 'nova_coluna', 'segunda_coluna'],
      dtype='object')

In [54]:
animals.columns = [col.lower() for col in animals.columns]

In [58]:
animals.rename(columns={'bodywt':'BODYWT'}, inplace=True)

In [60]:
animals.columns = [col.lower() for col in animals.columns]

In [63]:
animals

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,3,10,1,9
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,3,10,1,3249
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,2,10,1,3364
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,4,10,1,3481
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,1,10,1,3600


In [66]:
animals['bodywt'] * 2.205

0     14672.070000
1         2.205000
2         7.463925
3         2.028600
4      5616.135000
          ...     
57        4.410000
58        0.229320
59        9.238950
60        7.717500
61        8.930250
Name: bodywt, Length: 62, dtype: float64

In [67]:
# from kggrams to pounds
animals['bodywt_pounds'] = animals['bodywt'] * 2.205

In [69]:
animals.head()

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds
0,Africanelephant,6654.0,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0,14672.07
1,Africangiantpouchedrat,1.0,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1,2.205
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4,7.463925
3,Arcticgroundsquirrel,0.92,5.7,,,16.5,,25.0,5,2,3,10,1,9,2.0286
4,Asianelephant,2547.0,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16,5616.135


In [70]:
# from grams to kg
animals['brainwt_kg'] = animals['brainwt'] / 1000

In [71]:
animals.head()

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg
0,Africanelephant,6654.0,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0,14672.07,5.712
1,Africangiantpouchedrat,1.0,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1,2.205,0.0066
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4,7.463925,0.0445
3,Arcticgroundsquirrel,0.92,5.7,,,16.5,,25.0,5,2,3,10,1,9,2.0286,0.0057
4,Asianelephant,2547.0,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16,5616.135,4.603


# Performing calculations using columns

In [73]:
animals['brainwt'] / animals['bodywt']

0      0.858431
1      6.600000
2     13.146233
3      6.195652
4      1.807224
        ...    
57     6.150000
58    24.038462
59    13.842482
60     1.114286
61     4.197531
Length: 62, dtype: float64

In [74]:
# get the ratio between brain and body.
animals['ratio_brain_body'] = animals['brainwt'] / animals['bodywt']

In [76]:
# careful with scales!!
animals.head()

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body
0,Africanelephant,6654.0,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0,14672.07,5.712,0.858431
1,Africangiantpouchedrat,1.0,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1,2.205,0.0066,6.6
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4,7.463925,0.0445,13.146233
3,Arcticgroundsquirrel,0.92,5.7,,,16.5,,25.0,5,2,3,10,1,9,2.0286,0.0057,6.195652
4,Asianelephant,2547.0,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16,5616.135,4.603,1.807224


In [77]:
# get the ratio between brain and body.
animals['ratio_brain_body'] = animals['brainwt_kg'] / animals['bodywt']

In [78]:
animals.head()

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body
0,Africanelephant,6654.0,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0,14672.07,5.712,0.000858
1,Africangiantpouchedrat,1.0,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1,2.205,0.0066,0.0066
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4,7.463925,0.0445,0.013146
3,Arcticgroundsquirrel,0.92,5.7,,,16.5,,25.0,5,2,3,10,1,9,2.0286,0.0057,0.006196
4,Asianelephant,2547.0,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16,5616.135,4.603,0.001807


In [81]:
# sort values to see results.
animals.sort_values(by='ratio_brain_body', ascending=False)

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body
26,Groundsquirrel,0.101,4.00,10.4,3.4,13.8,9.0,28.0,5,1,3,10,1,676,0.222705,0.00400,0.039604
41,Owlmonkey,0.480,15.50,15.2,1.8,17.0,12.0,140.0,2,2,2,10,1,1681,1.058400,0.01550,0.032292
31,Lessershort-tailedshrew,0.005,0.14,7.7,1.4,9.1,2.6,21.5,5,2,4,10,1,961,0.011025,0.00014,0.028000
49,Rhesusmonkey,6.800,179.00,8.4,1.2,9.6,29.0,164.0,2,3,2,10,1,2401,14.994000,0.17900,0.026324
32,Littlebrownbat,0.010,0.25,17.9,2.0,19.9,24.0,50.0,1,1,1,10,1,1024,0.022050,0.00025,0.025000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,Wateropossum,3.500,3.90,12.8,6.6,19.4,3.0,14.0,2,1,1,10,1,3600,7.717500,0.00390,0.001114
7,Braziliantapir,160.000,169.00,5.2,1.0,6.2,30.4,392.0,4,5,4,10,1,49,352.800000,0.16900,0.001056
44,Pig,192.000,180.00,6.5,1.9,8.4,27.0,115.0,4,4,4,10,1,1936,423.360000,0.18000,0.000937
11,Cow,465.000,423.00,3.2,0.7,3.9,30.0,281.0,5,5,5,10,1,121,1025.325000,0.42300,0.000910


In [91]:
animals[['bodywt','brainwt_kg']].sum()

bodywt        12324.97900
brainwt_kg       17.55432
dtype: float64

## Performing calculations row-wise 

### Calculate the sum of weights for each animal

In [96]:
animals[['bodywt','brainwt_kg']].sum(axis=1)

0     6659.7120
1        1.0066
2        3.4295
3        0.9257
4     2551.6030
        ...    
57       2.0123
58       0.1065
59       4.2480
60       3.5039
61       4.0670
Length: 62, dtype: float64

In [93]:
animals['total_wt'] = animals[['bodywt','brainwt_kg']].sum(axis=1)

In [94]:
animals

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0,14672.070000,5.7120,0.000858,6659.7120
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1,2.205000,0.0066,0.006600,1.0066
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4,7.463925,0.0445,0.013146,3.4295
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,3,10,1,9,2.028600,0.0057,0.006196,0.9257
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16,5616.135000,4.6030,0.001807,2551.6030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,3,10,1,3249,4.410000,0.0123,0.006150,2.0123
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,2,10,1,3364,0.229320,0.0025,0.024038,0.1065
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,4,10,1,3481,9.238950,0.0580,0.013842,4.2480
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,1,10,1,3600,7.717500,0.0039,0.001114,3.5039


In [98]:
animals['totalwt'] = animals['bodywt'] + animals['brainwt_kg']

In [99]:
animals

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0,14672.070000,5.7120,0.000858,6659.7120,6659.7120
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1,2.205000,0.0066,0.006600,1.0066,1.0066
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4,7.463925,0.0445,0.013146,3.4295,3.4295
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,3,10,1,9,2.028600,0.0057,0.006196,0.9257,0.9257
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16,5616.135000,4.6030,0.001807,2551.6030,2551.6030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,3,10,1,3249,4.410000,0.0123,0.006150,2.0123,2.0123
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,2,10,1,3364,0.229320,0.0025,0.024038,0.1065,0.1065
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,4,10,1,3481,9.238950,0.0580,0.013842,4.2480,4.2480
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,1,10,1,3600,7.717500,0.0039,0.001114,3.5039,3.5039


# Conditional Calculations

## Assign values based on conditions

In [101]:
animals['danger']

0     3
1     3
2     1
3     3
4     4
     ..
57    3
58    2
59    4
60    1
61    1
Name: danger, Length: 62, dtype: int64

In [104]:
animals['danger']

0     3
1     3
2     1
3     3
4     4
     ..
57    3
58    2
59    4
60    1
61    1
Name: danger, Length: 62, dtype: int64

In [107]:
animals.loc[animals['danger'] >= 4, 'flag_extincao'] = 1

In [108]:
animals

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0,14672.070000,5.7120,0.000858,6659.7120,6659.7120,
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1,2.205000,0.0066,0.006600,1.0066,1.0066,
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4,7.463925,0.0445,0.013146,3.4295,3.4295,
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,3,10,1,9,2.028600,0.0057,0.006196,0.9257,0.9257,
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16,5616.135000,4.6030,0.001807,2551.6030,2551.6030,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,3,10,1,3249,4.410000,0.0123,0.006150,2.0123,2.0123,
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,2,10,1,3364,0.229320,0.0025,0.024038,0.1065,0.1065,
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,4,10,1,3481,9.238950,0.0580,0.013842,4.2480,4.2480,1.0
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,1,10,1,3600,7.717500,0.0039,0.001114,3.5039,3.5039,


In [109]:
animals.loc[~(animals['danger'] >= 4), 'flag_extincao'] = 0
# animals.loc[(animals['danger'] < 4), 'flag_extincao'] = 0

In [110]:
animals

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0,14672.070000,5.7120,0.000858,6659.7120,6659.7120,0.0
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1,2.205000,0.0066,0.006600,1.0066,1.0066,0.0
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4,7.463925,0.0445,0.013146,3.4295,3.4295,0.0
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,3,10,1,9,2.028600,0.0057,0.006196,0.9257,0.9257,0.0
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16,5616.135000,4.6030,0.001807,2551.6030,2551.6030,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,3,10,1,3249,4.410000,0.0123,0.006150,2.0123,2.0123,0.0
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,2,10,1,3364,0.229320,0.0025,0.024038,0.1065,0.1065,0.0
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,4,10,1,3481,9.238950,0.0580,0.013842,4.2480,4.2480,1.0
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,1,10,1,3600,7.717500,0.0039,0.001114,3.5039,3.5039,0.0


In [111]:
animals

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,3,10,1,0,14672.070000,5.7120,0.000858,6659.7120,6659.7120,0.0
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,3,10,1,1,2.205000,0.0066,0.006600,1.0066,1.0066,0.0
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,1,10,1,4,7.463925,0.0445,0.013146,3.4295,3.4295,0.0
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,3,10,1,9,2.028600,0.0057,0.006196,0.9257,0.9257,0.0
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,4,10,1,16,5616.135000,4.6030,0.001807,2551.6030,2551.6030,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,3,10,1,3249,4.410000,0.0123,0.006150,2.0123,2.0123,0.0
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,2,10,1,3364,0.229320,0.0025,0.024038,0.1065,0.1065,0.0
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,4,10,1,3481,9.238950,0.0580,0.013842,4.2480,4.2480,1.0
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,1,10,1,3600,7.717500,0.0039,0.001114,3.5039,3.5039,0.0


In [113]:
animals.groupby(by='flag_extincao').agg(['mean','median']).T

Unnamed: 0,flag_extincao,0.0,1.0
bodywt,mean,170.050674,263.831579
bodywt,median,1.41,35.0
brainwt,mean,211.964651,444.202105
brainwt,median,11.4,169.0
nondreaming,mean,9.655882,6.285714
nondreaming,median,9.05,6.95
dreaming,mean,2.447059,0.9625
dreaming,median,2.15,0.85
totalsleep,mean,11.978571,6.7375
totalsleep,median,11.6,7.2


In [118]:
animals['bodywt'].mean()

198.78998387096775

In [119]:
avg_bodywt = animals['bodywt'].mean()
avg_bodywt

198.78998387096775

In [122]:
animals.loc[(animals['bodywt'] > avg_bodywt), 'flag_overweight'] = True
animals.loc[~(animals.bodywt > avg_bodywt), 'flag_overweight'] = False

In [123]:
animals.groupby(by='flag_overweight').mean().T

flag_overweight,False,True
bodywt,20.945073,1596.142857
brainwt,83.369455,1852.714286
nondreaming,9.086667,2.466667
dreaming,2.088889,0.92
totalsleep,11.035849,5.2
lifespan,17.223529,39.214286
gestation,103.5,425.428571
predation,2.745455,3.857143
exposure,2.109091,4.857143
danger,2.436364,4.0


In [114]:
print(f'😅')

😅


In [115]:
animals['emoji'] = '😅'

In [116]:
animals

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,...,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao,emoji
0,Africanelephant,6654.000,5712.0,,,3.3,38.6,645.0,3,5,...,10,1,0,14672.070000,5.7120,0.000858,6659.7120,6659.7120,0.0,😅
1,Africangiantpouchedrat,1.000,6.6,6.3,2.0,8.3,4.5,42.0,3,1,...,10,1,1,2.205000,0.0066,0.006600,1.0066,1.0066,0.0,😅
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,...,10,1,4,7.463925,0.0445,0.013146,3.4295,3.4295,0.0,😅
3,Arcticgroundsquirrel,0.920,5.7,,,16.5,,25.0,5,2,...,10,1,9,2.028600,0.0057,0.006196,0.9257,0.9257,0.0,😅
4,Asianelephant,2547.000,4603.0,2.1,1.8,3.9,69.0,624.0,3,5,...,10,1,16,5616.135000,4.6030,0.001807,2551.6030,2551.6030,1.0,😅
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Treehyrax,2.000,12.3,4.9,0.5,5.4,7.5,200.0,3,1,...,10,1,3249,4.410000,0.0123,0.006150,2.0123,2.0123,0.0,😅
58,Treeshrew,0.104,2.5,13.2,2.6,15.8,2.3,46.0,3,2,...,10,1,3364,0.229320,0.0025,0.024038,0.1065,0.1065,0.0,😅
59,Vervet,4.190,58.0,9.7,0.6,10.3,24.0,210.0,4,3,...,10,1,3481,9.238950,0.0580,0.013842,4.2480,4.2480,1.0,😅
60,Wateropossum,3.500,3.9,12.8,6.6,19.4,3.0,14.0,2,1,...,10,1,3600,7.717500,0.0039,0.001114,3.5039,3.5039,0.0,😅


### Quantiles
*Touching statistics*

![Data](images/data_quantiles.png)

![Percentiles](images/data_quantiles_q10.png)

![Quartiles](images/quartiles.jpg)

> Percentiles: 
    - q50 means that 50% of your dataset is below that number. That is the median value.
    - q90 means that 90% of your dataset is below that number.
    - q10 means that 10% of your dataset is below that number.

> Calculating the q50 

In [124]:
animals['bodywt'].median()

3.3425

In [125]:
animals.loc[animals['bodywt'] < animals['bodywt'].median(), :]

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,...,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao,emoji,flag_overweight
1,Africangiantpouchedrat,1.0,6.6,6.3,2.0,8.3,4.5,42.0,3,1,...,1,1,2.205,0.0066,0.0066,1.0066,1.0066,0.0,😅,False
3,Arcticgroundsquirrel,0.92,5.7,,,16.5,,25.0,5,2,...,1,9,2.0286,0.0057,0.006196,0.9257,0.9257,0.0,😅,False
6,Bigbrownbat,0.023,0.3,15.8,3.9,19.7,19.0,35.0,1,1,...,1,36,0.050715,0.0003,0.013043,0.0233,0.0233,0.0,😅,False
8,Cat,3.3,25.6,10.9,3.6,14.5,28.0,63.0,1,2,...,1,64,7.2765,0.0256,0.007758,3.3256,3.3256,0.0,😅,False
10,Chinchilla,0.425,6.4,11.0,1.5,12.5,7.0,112.0,5,4,...,1,100,0.937125,0.0064,0.015059,0.4314,0.4314,1.0,😅,False
12,Deserthedgehog,0.55,2.4,7.6,2.7,10.3,,,2,1,...,1,144,1.21275,0.0024,0.004364,0.5524,0.5524,0.0,😅,False
14,EasternAmericanmole,0.075,1.2,6.3,2.1,8.4,3.5,42.0,1,1,...,1,196,0.165375,0.0012,0.016,0.0762,0.0762,0.0,😅,False
15,Echidna,3.0,25.0,8.6,0.0,8.6,50.0,28.0,2,2,...,1,225,6.615,0.025,0.008333,3.025,3.025,0.0,😅,False
16,Europeanhedgehog,0.785,3.5,6.6,4.1,10.7,6.0,42.0,2,2,...,1,256,1.730925,0.0035,0.004459,0.7885,0.7885,0.0,😅,False
17,Galago,0.2,5.0,9.5,1.2,10.7,10.4,120.0,2,2,...,1,289,0.441,0.005,0.025,0.205,0.205,0.0,😅,False


In [126]:
animals.shape

(62, 22)

In [128]:
animals = animals.drop([3,4])
animals.shape

(60, 22)

In [135]:
animals['bodywt'].quantile(q=0.1)

0.0735

0     False
1     False
2     False
5     False
6      True
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22    False
23    False
24    False
25    False
26    False
27    False
28    False
29    False
30    False
31     True
32     True
33    False
34    False
35    False
36     True
37     True
38    False
39    False
40    False
41    False
42    False
43    False
44    False
45    False
46    False
47    False
48    False
49    False
50    False
51    False
52    False
53    False
54    False
55     True
56    False
57    False
58    False
59    False
60    False
61    False
Name: bodywt, dtype: bool

In [141]:
mask = animals['bodywt'] < animals['bodywt'].quantile(q=0.1)

In [142]:
animals.loc[mask, :]

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,...,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao,emoji,flag_overweight
6,Bigbrownbat,0.023,0.3,15.8,3.9,19.7,19.0,35.0,1,1,...,1,36,0.050715,0.0003,0.013043,0.0233,0.0233,0.0,😅,False
31,Lessershort-tailedshrew,0.005,0.14,7.7,1.4,9.1,2.6,21.5,5,2,...,1,961,0.011025,0.00014,0.028,0.00514,0.00514,1.0,😅,False
32,Littlebrownbat,0.01,0.25,17.9,2.0,19.9,24.0,50.0,1,1,...,1,1024,0.02205,0.00025,0.025,0.01025,0.01025,0.0,😅,False
36,Mouse,0.023,0.4,11.9,1.3,13.2,3.2,19.0,4,1,...,1,1296,0.050715,0.0004,0.017391,0.0234,0.0234,0.0,😅,False
37,Muskshrew,0.048,0.33,10.8,2.0,12.8,2.0,30.0,4,1,...,1,1369,0.10584,0.00033,0.006875,0.04833,0.04833,0.0,😅,False
55,Starnosedmole,0.06,1.0,8.1,2.2,10.3,3.5,,3,1,...,1,3025,0.1323,0.001,0.016667,0.061,0.061,0.0,😅,False


In [138]:
q90 = animals['bodywt'].quantile(0.9)

In [143]:
q90

193.50000000000003

In [144]:
animals['bodywt'] < q90

0     False
1      True
2      True
5      True
6      True
7      True
8      True
9      True
10     True
11    False
12     True
13     True
14     True
15     True
16     True
17     True
18     True
19     True
20    False
21     True
22     True
23    False
24     True
25     True
26     True
27     True
28    False
29     True
30     True
31     True
32     True
33     True
34     True
35     True
36     True
37     True
38     True
39     True
40    False
41     True
42     True
43     True
44     True
45     True
46     True
47     True
48     True
49     True
50     True
51     True
52     True
53     True
54     True
55     True
56     True
57     True
58     True
59     True
60     True
61     True
Name: bodywt, dtype: bool

In [145]:
(animals['bodywt'] < q90).mean()

0.9

In [146]:
# using numpy
np.quantile(animals['bodywt'], q=0.9)

<IPython.core.display.Javascript object>

193.50000000000003

In [147]:
animals['bodywt'].quantile(q=0.9)

193.50000000000003

In [148]:
animals['bodywt'].quantile(q=[0.1, 0.5,0.9])

0.1      0.0735
0.5      3.3425
0.9    193.5000
Name: bodywt, dtype: float64

## Binarizing (categorizing) data

In [149]:
animals['bodywt']

0     6654.000
1        1.000
2        3.385
5       10.550
6        0.023
7      160.000
8        3.300
9       52.160
10       0.425
11     465.000
12       0.550
13     187.100
14       0.075
15       3.000
16       0.785
17       0.200
18       1.410
19      60.000
20     529.000
21      27.660
22       0.120
23     207.000
24      85.000
25      36.330
26       0.101
27       1.040
28     521.000
29     100.000
30      35.000
31       0.005
32       0.010
33      62.000
34       0.122
35       1.350
36       0.023
37       0.048
38       1.700
39       3.500
40     250.000
41       0.480
42      10.000
43       1.620
44     192.000
45       2.500
46       4.288
47       0.280
48       4.235
49       6.800
50       0.750
51       3.600
52      14.830
53      55.500
54       1.400
55       0.060
56       0.900
57       2.000
58       0.104
59       4.190
60       3.500
61       4.050
Name: bodywt, dtype: float64

In [152]:
pd.qcut(animals['bodywt'], q=4, labels=['PP','P','M','G'])

<IPython.core.display.Javascript object>

0      G
1      P
2      M
5      M
6     PP
7      G
8      P
9      G
10    PP
11     G
12     P
13     G
14    PP
15     P
16     P
17    PP
18     P
19     G
20     G
21     M
22    PP
23     G
24     G
25     M
26    PP
27     P
28     G
29     G
30     M
31    PP
32    PP
33     G
34    PP
35     P
36    PP
37    PP
38     P
39     M
40     G
41    PP
42     M
43     P
44     G
45     P
46     M
47    PP
48     M
49     M
50     P
51     M
52     M
53     G
54     P
55    PP
56     P
57     P
58    PP
59     M
60     M
61     M
Name: bodywt, dtype: category
Categories (4, object): [PP < P < M < G]

In [154]:
pd.cut(animals['bodywt'], bins=2, labels=['A','B'])

<IPython.core.display.Javascript object>

0     B
1     A
2     A
5     A
6     A
7     A
8     A
9     A
10    A
11    A
12    A
13    A
14    A
15    A
16    A
17    A
18    A
19    A
20    A
21    A
22    A
23    A
24    A
25    A
26    A
27    A
28    A
29    A
30    A
31    A
32    A
33    A
34    A
35    A
36    A
37    A
38    A
39    A
40    A
41    A
42    A
43    A
44    A
45    A
46    A
47    A
48    A
49    A
50    A
51    A
52    A
53    A
54    A
55    A
56    A
57    A
58    A
59    A
60    A
61    A
Name: bodywt, dtype: category
Categories (2, object): [A < B]

In [156]:
pd.qcut(animals['bodywt'], q=2, labels=['LEVE', 'SOBREPESO']).value_counts()

<IPython.core.display.Javascript object>

SOBREPESO    30
LEVE         30
Name: bodywt, dtype: int64

> Calculating the threshold for different quantiles

In [157]:
# say that animals above quantile 0.95 (percentile 95%) are super-overweight
animals.bodywt.quantile(0.95)

467.79999999999984

In [158]:
q95 = animals.bodywt.quantile(0.95)

In [159]:
# say that animals below quantile 0.1 (percentile 10%) are super-skinny
q10 = animals.bodywt.quantile(0.1)

In [160]:
q10

0.0735

In [161]:
pd.qcut(animals.drop(labels=[1,2]).bodywt, 10, labels=['a','b','c','d','e','f','g','h','i','j'])

<IPython.core.display.Javascript object>

0     j
5     g
6     a
7     i
8     e
9     h
10    c
11    j
12    c
13    i
14    b
15    e
16    c
17    b
18    d
19    h
20    j
21    h
22    b
23    j
24    i
25    h
26    b
27    d
28    j
29    i
30    h
31    a
32    a
33    i
34    b
35    d
36    a
37    a
38    e
39    f
40    j
41    c
42    g
43    e
44    i
45    e
46    g
47    c
48    f
49    g
50    c
51    f
52    g
53    h
54    d
55    a
56    d
57    e
58    b
59    f
60    f
61    f
Name: bodywt, dtype: category
Categories (10, object): [a < b < c < d ... g < h < i < j]

> Selecting (and counting) the observations that are above (or below) that threshold

In [163]:
q10

0.0735

In [164]:
f'bodywt < {q10}'

'bodywt < 0.0735'

In [165]:
animals.query(f'bodywt < {q10}')

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,...,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao,emoji,flag_overweight
6,Bigbrownbat,0.023,0.3,15.8,3.9,19.7,19.0,35.0,1,1,...,1,36,0.050715,0.0003,0.013043,0.0233,0.0233,0.0,😅,False
31,Lessershort-tailedshrew,0.005,0.14,7.7,1.4,9.1,2.6,21.5,5,2,...,1,961,0.011025,0.00014,0.028,0.00514,0.00514,1.0,😅,False
32,Littlebrownbat,0.01,0.25,17.9,2.0,19.9,24.0,50.0,1,1,...,1,1024,0.02205,0.00025,0.025,0.01025,0.01025,0.0,😅,False
36,Mouse,0.023,0.4,11.9,1.3,13.2,3.2,19.0,4,1,...,1,1296,0.050715,0.0004,0.017391,0.0234,0.0234,0.0,😅,False
37,Muskshrew,0.048,0.33,10.8,2.0,12.8,2.0,30.0,4,1,...,1,1369,0.10584,0.00033,0.006875,0.04833,0.04833,0.0,😅,False
55,Starnosedmole,0.06,1.0,8.1,2.2,10.3,3.5,,3,1,...,1,3025,0.1323,0.001,0.016667,0.061,0.061,0.0,😅,False


## Numpy where

In [166]:
import numpy as np

In [167]:
q95 = animals.bodywt.quantile(0.95)

> np.where(condition is true, then this value, else this value)

In [None]:
q95

In [168]:
np.where(animals['bodywt'] > q95, 'SUPER-OVERWEIGHT','NORMAL')

array(['SUPER-OVERWEIGHT', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL',
       'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL',
       'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL',
       'NORMAL', 'SUPER-OVERWEIGHT', 'NORMAL', 'NORMAL', 'NORMAL',
       'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'SUPER-OVERWEIGHT',
       'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL',
       'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL',
       'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL',
       'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL',
       'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL',
       'NORMAL', 'NORMAL', 'NORMAL'], dtype='<U16')

In [169]:
animals['class'] = np.where(animals['bodywt'] > q95, 'SUPER-OVERWEIGHT','NORMAL')

In [170]:
animals

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,...,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao,emoji,flag_overweight,class
0,Africanelephant,6654.0,5712.0,,,3.3,38.6,645.0,3,5,...,0,14672.07,5.712,0.000858,6659.712,6659.712,0.0,😅,True,SUPER-OVERWEIGHT
1,Africangiantpouchedrat,1.0,6.6,6.3,2.0,8.3,4.5,42.0,3,1,...,1,2.205,0.0066,0.0066,1.0066,1.0066,0.0,😅,False,NORMAL
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,...,4,7.463925,0.0445,0.013146,3.4295,3.4295,0.0,😅,False,NORMAL
5,Baboon,10.55,179.5,9.1,0.7,9.8,27.0,180.0,4,4,...,25,23.26275,0.1795,0.017014,10.7295,10.7295,1.0,😅,False,NORMAL
6,Bigbrownbat,0.023,0.3,15.8,3.9,19.7,19.0,35.0,1,1,...,36,0.050715,0.0003,0.013043,0.0233,0.0233,0.0,😅,False,NORMAL
7,Braziliantapir,160.0,169.0,5.2,1.0,6.2,30.4,392.0,4,5,...,49,352.8,0.169,0.001056,160.169,160.169,1.0,😅,False,NORMAL
8,Cat,3.3,25.6,10.9,3.6,14.5,28.0,63.0,1,2,...,64,7.2765,0.0256,0.007758,3.3256,3.3256,0.0,😅,False,NORMAL
9,Chimpanzee,52.16,440.0,8.3,1.4,9.7,50.0,230.0,1,1,...,81,115.0128,0.44,0.008436,52.6,52.6,0.0,😅,False,NORMAL
10,Chinchilla,0.425,6.4,11.0,1.5,12.5,7.0,112.0,5,4,...,100,0.937125,0.0064,0.015059,0.4314,0.4314,1.0,😅,False,NORMAL
11,Cow,465.0,423.0,3.2,0.7,3.9,30.0,281.0,5,5,...,121,1025.325,0.423,0.00091,465.423,465.423,1.0,😅,True,NORMAL


In [171]:
q85 = animals['bodywt'].quantile(0.85)
q50 = animals['bodywt'].quantile(0.50)

In [172]:
animals['class'] = np.where(animals['bodywt'] > q95, 'SUPER-OVERWEIGHT',
                           np.where(animals['bodywt'] > q85, 'ALMOST-SUPER-OVERWEIGHT', 
                                   np.where(animals['bodywt'] > q50,'QUASI-ALMOST-SUPER-OVERWEIGHT','NORMAL')))

# Bonus: 

## Correlation

*Touching statistics*

In [174]:
animals

Unnamed: 0,species,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,...,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao,emoji,flag_overweight,class
0,Africanelephant,6654.0,5712.0,,,3.3,38.6,645.0,3,5,...,0,14672.07,5.712,0.000858,6659.712,6659.712,0.0,😅,True,SUPER-OVERWEIGHT
1,Africangiantpouchedrat,1.0,6.6,6.3,2.0,8.3,4.5,42.0,3,1,...,1,2.205,0.0066,0.0066,1.0066,1.0066,0.0,😅,False,NORMAL
2,ArcticFox,3.385,44.5,,,12.5,14.0,60.0,1,1,...,4,7.463925,0.0445,0.013146,3.4295,3.4295,0.0,😅,False,QUASI-ALMOST-SUPER-OVERWEIGHT
5,Baboon,10.55,179.5,9.1,0.7,9.8,27.0,180.0,4,4,...,25,23.26275,0.1795,0.017014,10.7295,10.7295,1.0,😅,False,QUASI-ALMOST-SUPER-OVERWEIGHT
6,Bigbrownbat,0.023,0.3,15.8,3.9,19.7,19.0,35.0,1,1,...,36,0.050715,0.0003,0.013043,0.0233,0.0233,0.0,😅,False,NORMAL
7,Braziliantapir,160.0,169.0,5.2,1.0,6.2,30.4,392.0,4,5,...,49,352.8,0.169,0.001056,160.169,160.169,1.0,😅,False,ALMOST-SUPER-OVERWEIGHT
8,Cat,3.3,25.6,10.9,3.6,14.5,28.0,63.0,1,2,...,64,7.2765,0.0256,0.007758,3.3256,3.3256,0.0,😅,False,NORMAL
9,Chimpanzee,52.16,440.0,8.3,1.4,9.7,50.0,230.0,1,1,...,81,115.0128,0.44,0.008436,52.6,52.6,0.0,😅,False,QUASI-ALMOST-SUPER-OVERWEIGHT
10,Chinchilla,0.425,6.4,11.0,1.5,12.5,7.0,112.0,5,4,...,100,0.937125,0.0064,0.015059,0.4314,0.4314,1.0,😅,False,NORMAL
11,Cow,465.0,423.0,3.2,0.7,3.9,30.0,281.0,5,5,...,121,1025.325,0.423,0.00091,465.423,465.423,1.0,😅,True,ALMOST-SUPER-OVERWEIGHT


In [175]:
animals.corr()

Unnamed: 0,bodywt,brainwt,nondreaming,dreaming,totalsleep,lifespan,gestation,predation,exposure,danger,coluna_exemplo,nova_coluna,segunda_coluna,bodywt_pounds,brainwt_kg,ratio_brain_body,total_wt,totalwt,flag_extincao
bodywt,1.0,0.971521,-0.458833,-0.2763,-0.262101,0.206426,0.596126,0.064842,0.290797,0.09916,,,-0.180755,1.0,0.971521,-0.181723,1.0,1.0,-0.019918
brainwt,0.971521,1.0,-0.37879,-0.250603,-0.30801,0.392712,0.673801,0.041275,0.310061,0.09136,,,-0.202929,0.971521,1.0,-0.16766,0.971571,0.971571,-0.002271
nondreaming,-0.458833,-0.37879,1.0,0.527002,0.961543,-0.317273,-0.561653,-0.325218,-0.507802,-0.464829,,,0.094614,-0.458833,-0.37879,0.321054,-0.459127,-0.459127,-0.385593
dreaming,-0.2763,-0.250603,0.527002,1.0,0.740153,-0.314249,-0.527033,-0.447414,-0.550263,-0.582487,,,0.012392,-0.2763,-0.250603,-0.059691,-0.276448,-0.276448,-0.492179
totalsleep,-0.262101,-0.30801,0.961543,0.740153,1.0,-0.372411,-0.617528,-0.450279,-0.631634,-0.59496,,,0.074316,-0.262101,-0.30801,0.254819,-0.262147,-0.262147,-0.48715
lifespan,0.206426,0.392712,-0.317273,-0.314249,-0.372411,1.0,0.543378,-0.114718,0.313579,0.018572,,,-0.315208,0.206426,0.392712,-0.083439,0.206594,0.206594,0.138719
gestation,0.596126,0.673801,-0.561653,-0.527033,-0.617528,0.543378,1.0,0.247007,0.623227,0.371002,,,-0.198241,0.596126,0.673801,-0.288792,0.596209,0.596209,0.334716
predation,0.064842,0.041275,-0.325218,-0.447414,-0.450279,-0.114718,0.247007,1.0,0.647078,0.931898,,,0.086524,0.064842,0.041275,-0.081949,0.064823,0.064823,0.795784
exposure,0.290797,0.310061,-0.507802,-0.550263,-0.631634,0.313579,0.623227,0.647078,1.0,0.78651,,,-0.135675,0.290797,0.310061,-0.323672,0.290821,0.290821,0.829872
danger,0.09916,0.09136,-0.464829,-0.582487,-0.59496,0.018572,0.371002,0.931898,0.78651,1.0,,,0.009953,0.09916,0.09136,-0.203743,0.099156,0.099156,0.870771


# Today:

- Creating columns based on: 
   - mathematical calculations, 
   - column based calculations, 
   - conditions
   - variable conditions
- quantiles 
- correlations