## Eixos

In [1]:
import numpy as np
import pandas as pd

In [2]:
file = 'drinks.csv'
drinks = pd.read_csv(file)
drinks.columns

Index(['country', 'beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'population', 'continent'],
      dtype='object')

In [3]:
drinks.head(20)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,population,continent
0,Afghanistan,0,0,0,0.0,34.660.000,Asia
1,Albania,89,132,54,4.9,2.876.000,Europe
2,Algeria,25,0,14,0.7,40.061.000,Africa
3,Andorra,245,138,312,12.4,77.281.000,Europe
4,Angola,217,57,45,5.9,28.081.000,Africa
5,Antigua & Barbuda,102,128,45,4.9,86.295,North America
6,Argentina,193,25,221,8.3,43.085.000,South America
7,Armenia,21,179,11,3.8,2.925.000,Europe
8,Australia,261,72,212,10.4,24.013.000,Oceania
9,Austria,279,75,191,9.7,8.725.111,Europe


In [4]:
# Apagando uma coluna TEMPORARIAMENTE
drinks.drop('continent', axis=1).head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,population
0,Afghanistan,0,0,0,0.0,34.660.000
1,Albania,89,132,54,4.9,2.876.000
2,Algeria,25,0,14,0.7,40.061.000
3,Andorra,245,138,312,12.4,77.281.000
4,Angola,217,57,45,5.9,28.081.000


In [5]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,population,continent
0,Afghanistan,0,0,0,0.0,34.660.000,Asia
1,Albania,89,132,54,4.9,2.876.000,Europe
2,Algeria,25,0,14,0.7,40.061.000,Africa
3,Andorra,245,138,312,12.4,77.281.000,Europe
4,Angola,217,57,45,5.9,28.081.000,Africa


In [6]:
# Apagando uma linha temporariamente
drinks.drop(2, axis=0).head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,population,continent
0,Afghanistan,0,0,0,0.0,34.660.000,Asia
1,Albania,89,132,54,4.9,2.876.000,Europe
3,Andorra,245,138,312,12.4,77.281.000,Europe
4,Angola,217,57,45,5.9,28.081.000,Africa
5,Antigua & Barbuda,102,128,45,4.9,86.295,North America


Quando <b>referenciamos linhas e colunas</b> com o parâmetro axis:
<ul>
    <li><b>axis 0</b> = linhas</li>
    <li><b>axis 1</b> = colunas</li>
</ul>

In [7]:
# Calculando a média de cada coluna númerica
drinks.mean()

beer_servings                   106.160622
spirit_servings                  80.994819
wine_servings                    49.450777
total_litres_of_pure_alcohol      4.717098
dtype: float64

In [9]:
# Ou podemos fazer explicitando o eixo
drinks.mean(axis=0)

beer_servings                   106.160622
spirit_servings                  80.994819
wine_servings                    49.450777
total_litres_of_pure_alcohol      4.717098
dtype: float64

In [10]:
drinks.describe()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
count,193.0,193.0,193.0,193.0
mean,106.160622,80.994819,49.450777,4.717098
std,101.143103,88.284312,79.697598,3.773298
min,0.0,0.0,0.0,0.0
25%,20.0,4.0,1.0,1.3
50%,76.0,56.0,8.0,4.2
75%,188.0,128.0,59.0,7.2
max,376.0,438.0,370.0,14.4


In [11]:
# Calculando a média para cada linha
drinks.mean(axis=1).head()

0      0.000
1     69.975
2      9.925
3    176.850
4     81.225
dtype: float64

In [12]:
# 'index' é um alias para axis 0
drinks.mean(axis='index')

beer_servings                   106.160622
spirit_servings                  80.994819
wine_servings                    49.450777
total_litres_of_pure_alcohol      4.717098
dtype: float64

In [14]:
drinks.mean(axis='columns').head()

0      0.000
1     69.975
2      9.925
3    176.850
4     81.225
dtype: float64

In [15]:
file = 'eixos.csv'
eixos = pd.read_csv(file)
eixos.columns

Index(['A', 'B', 'C'], dtype='object')

## Trabalhando com as linhas - axis = 0

### O comportamento padrão da função mean() é o eixo 0 - axis = 0

In [16]:
eixos.mean()

A    10.0
B     5.0
C    10.0
dtype: float64

In [17]:
eixos.mean(axis=0)

A    10.0
B     5.0
C    10.0
dtype: float64

In [18]:
eixos.sum(axis=0)

A    30
B    15
C    30
dtype: int64

## Trabalhando com as colunas

In [19]:
eixos.mean(axis=1)

0     6.666667
1     8.333333
2    10.000000
dtype: float64

In [20]:
eixos.sum(axis=1)

0    20
1    25
2    30
dtype: int64

In [21]:
# Verificando os tipos
drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
population                       object
continent                        object
dtype: object

In [23]:
A = 'RATO'
A.replace('R','P')

'PATO'

In [24]:
# A coluna population está com 2 pontos, para ser reconhecida como float ou int deveria ter apenas um. 
# Para isso usaremos a função replace para remover os pontos
# Se tentarmos converter direto ainda assim continuará como object
populacao_int = drinks['population'] = drinks.population.str.replace('.','')
populacao_int.head()

0    34660000
1     2876000
2    40061000
3    77281000
4    28081000
Name: population, dtype: object

In [26]:
# Para fazer a alteração precisamos iterar os dados convertendo para int.
inteiros = []

for item in drinks.population:
    a = item.replace('.','')
    b = int(a)
    inteiros.append(b)

In [28]:
inteiros[3]

77281000

In [30]:
type(inteiros[3])

int

In [31]:
drinks['pop_total'] = pd.Series(inteiros)

In [32]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,population,continent,pop_total
0,Afghanistan,0,0,0,0.0,34660000,Asia,34660000
1,Albania,89,132,54,4.9,2876000,Europe,2876000
2,Algeria,25,0,14,0.7,40061000,Africa,40061000
3,Andorra,245,138,312,12.4,77281000,Europe,77281000
4,Angola,217,57,45,5.9,28081000,Africa,28081000


In [34]:
drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
population                       object
continent                        object
pop_total                         int64
dtype: object

In [36]:
new_drinks = drinks.drop('population', axis=1)
new_drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent,pop_total
0,Afghanistan,0,0,0,0.0,Asia,34660000
1,Albania,89,132,54,4.9,Europe,2876000
2,Algeria,25,0,14,0.7,Africa,40061000
3,Andorra,245,138,312,12.4,Europe,77281000
4,Angola,217,57,45,5.9,Africa,28081000


In [44]:
# Alterando o nome da coluna
new_drinks.rename(columns={'pop_total': 'population'}, inplace=True)

In [45]:
new_drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent,population
0,Afghanistan,0,0,0,0.0,Asia,34660000
1,Albania,89,132,54,4.9,Europe,2876000
2,Algeria,25,0,14,0.7,Africa,40061000
3,Andorra,245,138,312,12.4,Europe,77281000
4,Angola,217,57,45,5.9,Africa,28081000


In [46]:
new_drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
population                        int64
dtype: object

In [47]:
new_drinks.describe()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,population
count,193.0,193.0,193.0,193.0,193.0
mean,106.160622,80.994819,49.450777,4.717098,38490830.0
std,101.143103,88.284312,79.697598,3.773298,139657000.0
min,0.0,0.0,0.0,0.0,1190.0
25%,20.0,4.0,1.0,1.3,1978300.0
50%,76.0,56.0,8.0,4.2,8412000.0
75%,188.0,128.0,59.0,7.2,28037900.0
max,376.0,438.0,370.0,14.4,1372470000.0
