# **Python para análise de dados(Pandas)**

In [33]:
#importando a biblioteca pandas
import pandas as pd

In [34]:
df = pd.read_csv("../datasets/Gapminder.csv", on_bad_lines='skip', sep=";")

In [35]:
#Visualizando as 5 primeiras linhas
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106


In [36]:
df = df.rename(
    columns={
        "country":"Pais", 
        "continent": "continente", 
        "year":"Ano", 
        "lifeExp":"Expectativa de vida", 
        "pop":"Pop Total", 
        "gdpPercap": "PIB"
        }
    )

In [37]:
# Arredondando os valores flutuantes com duas casas decimais
df = df.round({'Expectativa de vida': 2, 'PIB': 2})

In [38]:
df.head(10)

Unnamed: 0,Pais,continente,Ano,Expectativa de vida,Pop Total,PIB
0,Afghanistan,Asia,1952,28.8,8425333,779.45
1,Afghanistan,Asia,1957,30.33,9240934,820.85
2,Afghanistan,Asia,1962,32.0,10267083,853.1
3,Afghanistan,Asia,1967,34.02,11537966,836.2
4,Afghanistan,Asia,1972,36.09,13079460,739.98
5,Afghanistan,Asia,1977,38.44,14880372,786.11
6,Afghanistan,Asia,1982,39.85,12881816,978.01
7,Afghanistan,Asia,1987,40.82,13867957,852.4
8,Afghanistan,Asia,1992,41.67,16317921,649.34
9,Afghanistan,Asia,1997,41.76,22227415,635.34


In [39]:
#Total de linhas e colunas
df.shape

(3312, 6)

In [40]:
# Para listar todas a colunas do DataFrame
df.columns

Index(['Pais', 'continente', 'Ano', 'Expectativa de vida', 'Pop Total', 'PIB'], dtype='object')

In [41]:
# Para saber os tipos de dados presentes em cada coluna no DataFrame
df.dtypes

Pais                    object
continente              object
Ano                      int64
Expectativa de vida    float64
Pop Total                int64
PIB                    float64
dtype: object

In [42]:
# Para listar o DataFrame de traz para frente (Da calda = Tail)
df.tail(15)

Unnamed: 0,Pais,continente,Ano,Expectativa de vida,Pop Total,PIB
3297,Zambia,Africa,1997,40.24,9417789,1071.35
3298,Zambia,Africa,2002,39.19,10595811,1071.61
3299,Zambia,Africa,2007,42.38,11746035,1271.21
3300,Zimbabwe,Africa,1952,48.45,3080907,406.88
3301,Zimbabwe,Africa,1957,50.47,3646340,518.76
3302,Zimbabwe,Africa,1962,52.36,4277736,527.27
3303,Zimbabwe,Africa,1967,54.0,4995432,569.8
3304,Zimbabwe,Africa,1972,55.64,5861135,799.36
3305,Zimbabwe,Africa,1977,57.67,6642107,685.59
3306,Zimbabwe,Africa,1982,60.36,7636524,788.86


In [43]:
# Retorna a análise estátistica do nosso DataFrame
df.describe()

Unnamed: 0,Ano,Expectativa de vida,Pop Total,PIB
count,3312.0,3312.0,3312.0,3312.0
mean,1980.30163,65.246881,31614890.0,11317.115821
std,16.927294,11.768389,104119300.0,11369.14271
min,1950.0,23.6,59412.0,241.17
25%,1967.0,58.3375,2678572.0,2514.6275
50%,1982.0,69.61,7557218.0,7838.505
75%,1996.0,73.66,19585220.0,17357.8825
max,2007.0,82.67,1318683000.0,113523.13


In [44]:
# Retorne Apenas os valores unicos da coluna continente
df["continente"].unique()

array(['Asia', 'Europe', 'Africa', 'Americas', nan, 'FSU', 'Oceania'],
      dtype=object)

In [53]:
# Armazene na variável Ocenia apenas os continentes iguais a Oceania
Oceania = df.loc[df["continente"] == "Oceania"]
Oceania.head()

Unnamed: 0,Pais,continente,Ano,Expectativa de vida,Pop Total,PIB
975,Fiji,Oceania,1962,56.49,421869,2039.39
976,Fiji,Oceania,1967,58.61,485000,2170.07
977,Fiji,Oceania,1972,60.64,544000,2797.76
978,Fiji,Oceania,1977,62.67,599339,3182.57
979,Fiji,Oceania,1982,64.68,658906,3209.26


In [46]:
# Verificando se de fato existe apenas o continente Oceania dentro da coluna continente
Oceania["continente"].unique()

array(['Oceania'], dtype=object)

In [47]:
# Utilizando o método de agrupamento GroupBy e fazendo uma
# contagem distinta usando o nunique (Quantos Paises existem para cada continente)
df.groupby("continente")["Pais"].nunique()

continente
Africa      51
Americas    25
Asia        41
Europe      35
FSU          6
Oceania      3
Name: Pais, dtype: int64

In [62]:
# Para cada ano, qual é a expectativa média de vida? usando o método mean()
df.groupby("Ano")["Expectativa de vida"].mean().round(2)

Ano
1950    62.00
1951    65.90
1952    49.21
1953    66.67
1954    67.46
1955    67.81
1956    67.95
1957    51.61
1958    68.82
1959    68.23
1960    68.47
1961    68.86
1962    54.04
1963    69.60
1964    70.06
1965    70.26
1966    70.45
1967    56.26
1968    70.69
1969    70.65
1970    70.96
1971    71.10
1972    58.47
1973    71.50
1974    71.78
1975    71.94
1976    72.16
1977    60.43
1978    72.72
1979    73.02
1980    73.06
1981    73.34
1982    62.37
1983    73.79
1984    74.10
1985    74.11
1986    74.45
1987    63.98
1988    74.76
1989    74.92
1990    74.28
1991    74.37
1992    65.01
1993    74.32
1994    74.46
1995    74.55
1996    75.03
1997    65.87
1998    75.57
1999    75.70
2000    76.03
2001    76.26
2002    66.84
2003    76.59
2004    76.92
2005    76.72
2006    77.89
2007    67.87
Name: Expectativa de vida, dtype: float64

In [66]:
# O método mean() retorna o cáculo dá média
df["PIB"].mean().round(2)

11317.12

In [67]:
# O método sum() retorna a soma total de todas as colunas
df["PIB"].sum().round(2)

37482287.6