In [1]:
import pandas as pd

In [2]:
pokemon = pd.read_csv('Pokemon.csv')

pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


Сhange the names of the original columns

In [3]:
pokemon = (pokemon.rename(columns={'#' : 'id'})
                  .rename(columns=lambda col: col.replace(' ', '_').replace('.', '').lower())
          )

In [4]:
pokemon.columns

Index(['id', 'name', 'type_1', 'type_2', 'total', 'hp', 'attack', 'defense',
       'sp_atk', 'sp_def', 'speed', 'generation', 'legendary'],
      dtype='object')

Count how many legendary Pokémon there are in each generation, as well as how many non-legendary Pokémon there are in these generations

In [5]:
legends = (pokemon.groupby('generation')   # group by generation
           .legendary   # select the 'legendary' column
           .value_counts()   # count the values
           .to_frame()   # convert to a DataFrame with a multi-index
          )
legends

Unnamed: 0_level_0,Unnamed: 1_level_0,count
generation,legendary,Unnamed: 2_level_1
1,False,160
1,True,6
2,False,101
2,True,5
3,False,142
3,True,18
4,False,108
4,True,13
5,False,150
5,True,15


In [6]:
legends = legends.rename(columns={'count': 'legendary_count'}) # rename the column in the DataFrame

In [7]:
legends.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,legendary_count
generation,legendary,Unnamed: 2_level_1
1,False,160
1,True,6
2,False,101


In [8]:
legends_unstacked = legends.unstack('legendary')
legends_unstacked.head()

Unnamed: 0_level_0,legendary_count,legendary_count
legendary,False,True
generation,Unnamed: 1_level_2,Unnamed: 2_level_2
1,160,6
2,101,5
3,142,18
4,108,13
5,150,15


Now let’s find out which Pokémon types and which generation have the most legendaries

In [9]:
(pokemon.groupby(['generation', 'type_1'])
                       .legendary.value_counts()
                       .to_frame()
                       .unstack()
                       .loc[:,('count', True)]
                       .idxmax()
)

(3, 'Dragon')

A dataset of superheroes in wide format

In [10]:
superheroes = pd.read_csv('superheroes_power_matrix.csv')
superheroes.head()

Unnamed: 0,Name,Agility,Accelerated Healing,Lantern Power Ring,Dimensional Awareness,Cold Resistance,Durability,Stealth,Energy Absorption,Flight,...,Web Creation,Reality Warping,Odin Force,Symbiote Costume,Speed Force,Phoenix Force,Molecular Dissipation,Vision - Cryo,Omnipresent,Omniscient
0,3-D Man,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,A-Bomb,False,True,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,Abe Sapien,True,True,False,False,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,Abin Sur,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Abomination,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


Convert the data to a long format where the only identifier is the hero’s name (*Name*)

In [11]:
superheroes_long = superheroes.melt(id_vars = 'Name', var_name='superpower')
superheroes_long.head(3)

Unnamed: 0,Name,superpower,value
0,3-D Man,Agility,True
1,A-Bomb,Agility,False
2,Abe Sapien,Agility,True


In [12]:
superheroes_powers = (superheroes_long.query('value == True') # filter
                                      .groupby(['Name']) # group by 'Name'
                                      .superpower # select the 'superpower' column to apply
                                      .apply(list) # apply list
                                      .reset_index() # reset the index, convert to DataFrame
                     )
superheroes_powers.head()

Unnamed: 0,Name,superpower
0,3-D Man,"[Agility, Super Strength, Stamina, Super Speed]"
1,A-Bomb,"[Accelerated Healing, Durability, Longevity, S..."
2,Abe Sapien,"[Agility, Accelerated Healing, Cold Resistance..."
3,Abin Sur,[Lantern Power Ring]
4,Abomination,"[Accelerated Healing, Intelligence, Super Stre..."
