<a href="https://colab.research.google.com/github/tmabgdata/data_analysis/blob/master/pandas_introduction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pandas

Pandas is a Python library used for working with data sets.

It has functions for analyzing, cleaning, exploring, and manipulating data.

The name "Pandas" has a reference to both "Panel Data", and "Python Data Analysis" and was created by Wes McKinney in 2008. 

By [W3Schools](https://www.w3schools.com/python/pandas/pandas_intro.asp#:~:text=Pandas%20is%20a%20Python%20library,by%20Wes%20McKinney%20in%202008.).


# Pokémon Dataset Analysis

## Access Data

In [1]:
import pandas as pd

In [2]:
# Using csv file translated by the professor of the Uninter University, Leony Gomes
df = pd.read_csv('https://raw.githubusercontent.com/leonhgomes/Pandas/main/pokemon_data.csv')

In [3]:
# checking column name
df.columns.values.tolist()

['#',
 'Nome',
 'Tipo 1',
 'Tipo 2',
 'Vida',
 'Ataque',
 'Defensa',
 'Atq Esp',
 'Def Esp',
 'Velocidade',
 'Geracao',
 'Lendario']

In [4]:
# checking data types
df.dtypes

#              int64
Nome          object
Tipo 1        object
Tipo 2        object
Vida           int64
Ataque         int64
Defensa        int64
Atq Esp        int64
Def Esp        int64
Velocidade     int64
Geracao        int64
Lendario        bool
dtype: object

In [5]:
# replace column names
df.rename(columns={'#':'id_pokédex', 'Nome':'Name', 'Tipo 1':'Type 1', 'Tipo 2':'Type 2',
                   'Vida':'Life', 'Ataque':'Attack', 'Defesa':'Defense', 'Atq Esp':'Spec Att',
                   'Def Esp':'Spec Def', 'Velocidade':'Speed', 'Geracao':'Generation', 'Lendario':'Legend'}, inplace=True)
df.columns.values.tolist()

['id_pokédex',
 'Name',
 'Type 1',
 'Type 2',
 'Life',
 'Attack',
 'Defensa',
 'Spec Att',
 'Spec Def',
 'Speed',
 'Generation',
 'Legend']

In [6]:
# showing the top 3
df.head(3)

Unnamed: 0,id_pokédex,Name,Type 1,Type 2,Life,Attack,Defensa,Spec Att,Spec Def,Speed,Generation,Legend
0,1,Bulbasaur,Planta,Venenoso,45,49,49,65,65,45,1,False
1,2,Ivysaur,Planta,Venenoso,60,62,63,80,80,60,1,False
2,3,Venusaur,Planta,Venenoso,80,82,83,100,100,80,1,False


In [7]:
# showing the last 3
df.tail(3)

Unnamed: 0,id_pokédex,Name,Type 1,Type 2,Life,Attack,Defensa,Spec Att,Spec Def,Speed,Generation,Legend
797,720,HoopaHoopa Confined,Psiquico,Fantasma,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psiquico,Noturno,80,160,60,170,130,80,6,True
799,721,Volcanion,Fogo,Agua,80,110,120,130,90,70,6,True


In [8]:
# checking data
df['Type 1'].unique()

array(['Planta', 'Fogo', 'Agua', 'Inseto', 'Normal', 'Venenoso',
       'Electric', 'Terra', 'Fada', 'Lutador', 'Psiquico', 'Pedra',
       'Fantasma', 'Gelo', 'Dragao', 'Noturno', 'Metalico', 'Voador'],
      dtype=object)

In [9]:
# translating to english
df['Type 1'] = df['Type 1'].replace(['Planta','Fogo','Agua','Inseto','Normal','Venenoso',
                      'Eletric','Terra','Fada','Lutador','Psiquico','Pedra',
                      'Fantasma','Gelo','Dragao','Noturno','Metalico','Voador'],
                     ['Grass','Fire','Water','Bug','Normal','Poison',
                      'Eletric','Ground','Fairy','Fighting','Psychic','Rock',
                      'Ghost','Ice','Dragon','Dark','Steel','Flying'])

In [10]:
# checking
df['Type 1'].unique()

array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',
       'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',
       'Dragon', 'Dark', 'Steel', 'Flying'], dtype=object)

In [11]:
# translating to english
df['Type 2'] = df['Type 2'].replace(['Planta','Fogo','Agua','Inseto','Normal','Venenoso',
                      'Eletric','Terra','Fada','Lutador','Psiquico','Pedra',
                      'Fantasma','Gelo','Dragao','Noturno','Metalico','Voador'],
                     ['Grass','Fire','Water','Bug','Normal','Poison',
                      'Eletric','Ground','Fairy','Fighting','Psychic','Rock',
                      'Ghost','Ice','Dragon','Dark','Steel','Flying'])

In [12]:
# checking
df['Type 2'].unique()

array(['Poison', nan, 'Flying', 'Dragon', 'Ground', 'Fairy', 'Grass',
       'Fighting', 'Psychic', 'Steel', 'Ice', 'Rock', 'Dark', 'Water',
       'Electric', 'Fire', 'Ghost', 'Bug', 'Normal'], dtype=object)

In [13]:
# replacing null values to 'Normal' value
df['Type 2'].fillna('Normal', inplace = True)

In [14]:
# Select by attribute 
df.loc[df['Type 1'] == 'Psychic']

Unnamed: 0,id_pokédex,Name,Type 1,Type 2,Life,Attack,Defensa,Spec Att,Spec Def,Speed,Generation,Legend
68,63,Abra,Psychic,Normal,25,20,15,105,55,90,1,False
69,64,Kadabra,Psychic,Normal,40,35,30,120,70,105,1,False
70,65,Alakazam,Psychic,Normal,55,50,45,135,95,120,1,False
71,65,AlakazamMega Alakazam,Psychic,Normal,55,50,65,175,95,150,1,False
104,96,Drowzee,Psychic,Normal,60,48,45,43,90,42,1,False
105,97,Hypno,Psychic,Normal,85,73,70,73,115,67,1,False
131,122,Mr. Mime,Psychic,Fairy,40,45,65,100,120,90,1,False
162,150,Mewtwo,Psychic,Normal,106,110,90,154,90,130,1,True
163,150,MewtwoMega Mewtwo X,Psychic,Fighting,106,190,100,154,100,130,1,True
164,150,MewtwoMega Mewtwo Y,Psychic,Normal,106,150,70,194,120,140,1,True


In [15]:
# some statistics
df.describe()

Unnamed: 0,id_pokédex,Life,Attack,Defensa,Spec Att,Spec Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


## Ordination

In [16]:
# sort alphabetically | to invert, use the 'ascending' parameter = False
df.sort_values(by=['Name'], ascending = True, ignore_index = True)
df.head(10)

Unnamed: 0,id_pokédex,Name,Type 1,Type 2,Life,Attack,Defensa,Spec Att,Spec Def,Speed,Generation,Legend
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,Normal,39,52,43,60,50,65,1,False
5,5,Charmeleon,Fire,Normal,58,64,58,80,65,80,1,False
6,6,Charizard,Fire,Flying,78,84,78,109,85,100,1,False
7,6,CharizardMega Charizard X,Fire,Dragon,78,130,111,130,85,100,1,False
8,6,CharizardMega Charizard Y,Fire,Flying,78,104,78,159,115,100,1,False
9,7,Squirtle,Water,Normal,44,48,65,50,64,43,1,False


In [17]:
# creating 'Total Power' column, with the sum of all the data:
df['Total Pw'] = df.iloc[:,4:10].sum(axis = 1)
df.head(5)

Unnamed: 0,id_pokédex,Name,Type 1,Type 2,Life,Attack,Defensa,Spec Att,Spec Def,Speed,Generation,Legend,Total Pw
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,525
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,625
4,4,Charmander,Fire,Normal,39,52,43,60,50,65,1,False,309


In [18]:
# reorder columns
cols = list(df.columns.values)
df = df [cols[0:4] + [cols[-1]] + cols[4:12]]
df.head(5)

Unnamed: 0,id_pokédex,Name,Type 1,Type 2,Total Pw,Life,Attack,Defensa,Spec Att,Spec Def,Speed,Generation,Legend
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,Normal,309,39,52,43,60,50,65,1,False


# Save File

## Complete Data processing


In [19]:
df.to_csv('pokemon_complete_data_processing.csv')