# Advanced Transformations Core

Kris Barbier

In [1]:
#Imports
import pandas as pd
import numpy as np
import json

In [2]:
#Load in superhero_info data
df_heroes = pd.read_csv('Data/superhero_info - superhero_info.csv')

#Preview data
df_heroes.head()

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}"
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}"


In [3]:
#Preview info
df_heroes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 463 entries, 0 to 462
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Hero|Publisher  463 non-null    object
 1   Gender          463 non-null    object
 2   Race            463 non-null    object
 3   Alignment       463 non-null    object
 4   Hair color      463 non-null    object
 5   Eye color       463 non-null    object
 6   Skin color      463 non-null    object
 7   Measurements    463 non-null    object
dtypes: object(8)
memory usage: 29.1+ KB


In [4]:
#Load in superhero_powers data
df_powers = pd.read_csv('Data/superhero_powers - superhero_powers.csv')

#Preview data
df_powers.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [5]:
#Preview info
df_powers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 667 entries, 0 to 666
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   hero_names  667 non-null    object
 1   Powers      667 non-null    object
dtypes: object(2)
memory usage: 10.5+ KB


- Tasks to complete:
    - Split Hero|Publisher column into Hero and Publisher
    - Split Measurements column into Height and Weight
    - One-Hot encode all powers from the Powers column

In [6]:
#Split hero/publisher
df_heroes[['Hero', 'Publisher']] = df_heroes['Hero|Publisher'].str.split('|', expand = True)
#Verify the split
df_heroes.head(2)

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics


In [7]:
#Drop Hero|Publisher
df_heroes = df_heroes.drop(columns = 'Hero|Publisher')
#Verify the drop
df_heroes.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


In [8]:
##Split measurements into height and weight
#Use str.replace to replace single quotes
df_heroes['Measurements'] = df_heroes['Measurements'].str.replace("'",'"')
#Apply json.loads to whole column
df_heroes['Measurements'] = df_heroes['Measurements'].apply(json.loads)
#Verify changes
df_heroes['Measurements'].head(2)

0    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
1     {'Height': '191.0 cm', 'Weight': '65.0 kg'}
Name: Measurements, dtype: object

In [9]:
#Check a single value after transformation
test_measurement = df_heroes.loc[0, 'Measurements']
print(type(test_measurement))
test_measurement

<class 'dict'>


{'Height': '203.0 cm', 'Weight': '441.0 kg'}

In [10]:
#Create series of height and weight values
height_weight = df_heroes['Measurements'].apply(pd.Series)
height_weight

Unnamed: 0,Height,Weight
0,203.0 cm,441.0 kg
1,191.0 cm,65.0 kg
2,185.0 cm,90.0 kg
3,203.0 cm,441.0 kg
4,193.0 cm,122.0 kg
...,...,...
458,183.0 cm,83.0 kg
459,165.0 cm,52.0 kg
460,66.0 cm,17.0 kg
461,170.0 cm,57.0 kg


In [11]:
#Split columns to obtain only numeric values
height_weight[['Height (cm)', 'cm']] = height_weight['Height'].str.split(' ', expand = True)
height_weight[['Weight (kg)', 'kg']] = height_weight['Weight'].str.split(' ', expand = True)

#Verify changes
height_weight

Unnamed: 0,Height,Weight,Height (cm),cm,Weight (kg),kg
0,203.0 cm,441.0 kg,203.0,cm,441.0,kg
1,191.0 cm,65.0 kg,191.0,cm,65.0,kg
2,185.0 cm,90.0 kg,185.0,cm,90.0,kg
3,203.0 cm,441.0 kg,203.0,cm,441.0,kg
4,193.0 cm,122.0 kg,193.0,cm,122.0,kg
...,...,...,...,...,...,...
458,183.0 cm,83.0 kg,183.0,cm,83.0,kg
459,165.0 cm,52.0 kg,165.0,cm,52.0,kg
460,66.0 cm,17.0 kg,66.0,cm,17.0,kg
461,170.0 cm,57.0 kg,170.0,cm,57.0,kg


In [12]:
#Drop height, weight, cm, and kg columns
height_weight = height_weight.drop(columns = ['Height', 'Weight', 'cm', 'kg'])

#Verify the changes
height_weight.head()

Unnamed: 0,Height (cm),Weight (kg)
0,203.0,441.0
1,191.0,65.0
2,185.0,90.0
3,203.0,441.0
4,193.0,122.0


In [13]:
#Concatenate frames
df_heroes = pd.concat((df_heroes, height_weight), axis = 1)
df_heroes.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher,Height (cm),Weight (kg)
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics,203.0,441.0
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics,191.0,65.0


In [14]:
#Drop measurements column
df_heroes = df_heroes.drop(columns = 'Measurements')
df_heroes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 463 entries, 0 to 462
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Gender       463 non-null    object
 1   Race         463 non-null    object
 2   Alignment    463 non-null    object
 3   Hair color   463 non-null    object
 4   Eye color    463 non-null    object
 5   Skin color   463 non-null    object
 6   Hero         463 non-null    object
 7   Publisher    463 non-null    object
 8   Height (cm)  463 non-null    object
 9   Weight (kg)  463 non-null    object
dtypes: object(10)
memory usage: 36.3+ KB


In [15]:
#Change height and weight columns to numeric types
df_heroes[['Height (cm)', 'Weight (kg)']] = df_heroes[['Height (cm)', 'Weight (kg)']].astype('float')
df_heroes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 463 entries, 0 to 462
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Gender       463 non-null    object 
 1   Race         463 non-null    object 
 2   Alignment    463 non-null    object 
 3   Hair color   463 non-null    object 
 4   Eye color    463 non-null    object 
 5   Skin color   463 non-null    object 
 6   Hero         463 non-null    object 
 7   Publisher    463 non-null    object 
 8   Height (cm)  463 non-null    float64
 9   Weight (kg)  463 non-null    float64
dtypes: float64(2), object(8)
memory usage: 36.3+ KB


In [16]:
df_powers.loc[0, 'Powers']

'Agility,Super Strength,Stamina,Super Speed'

In [17]:
df_powers['Powers'].value_counts()

Intelligence                                                                                                                                                                                                                                                         8
Durability,Super Strength                                                                                                                                                                                                                                            5
Agility,Stealth,Marksmanship,Weapons Master,Stamina                                                                                                                                                                                                                  4
Marksmanship                                                                                                                                                                                                       

In [18]:
col_list = list(df_powers['Powers'])
print(col_list)

['Agility,Super Strength,Stamina,Super Speed', 'Accelerated Healing,Durability,Longevity,Super Strength,Stamina,Camouflage,Self-Sustenance', 'Agility,Accelerated Healing,Cold Resistance,Durability,Underwater breathing,Marksmanship,Weapons Master,Longevity,Intelligence,Super Strength,Telepathy,Stamina,Immortality,Reflexes,Enhanced Sight,Sub-Mariner', 'Lantern Power Ring', 'Accelerated Healing,Intelligence,Super Strength,Stamina,Super Speed,Invulnerability,Animation,Super Breath', 'Dimensional Awareness,Flight,Intelligence,Super Strength,Size Changing,Super Speed,Teleportation,Magic,Dimensional Travel,Immortality,Invulnerability,Molecular Manipulation,Energy Manipulation,Power Cosmic', 'Cold Resistance,Durability,Energy Absorption,Super Strength,Invulnerability,Elemental Transmogrification,Fire Resistance,Natural Armor,Molecular Manipulation,Heat Resistance,Matter Absorption', 'Accelerated Healing,Immortality,Regeneration', 'Durability,Stealth,Flight,Marksmanship,Weapons Master,Intellige

In [19]:
#Create new column where powers are separated by a space, not a comma
df_powers['Powers_Sep'] = df_powers['Powers'].str.replace("'",'"')
df_powers['Powers_Sep'].head()

0           Agility,Super Strength,Stamina,Super Speed
1    Accelerated Healing,Durability,Longevity,Super...
2    Agility,Accelerated Healing,Cold Resistance,Du...
3                                   Lantern Power Ring
4    Accelerated Healing,Intelligence,Super Strengt...
Name: Powers_Sep, dtype: object

In [20]:
# Apply json.loads to entire column
#df_powers['Powers_Sep'] = df_powers['Powers_Sep'].apply(json.loads)
# check results
#df_powers['Powers_Sep'].head()

In [21]:
#Explode columns
exploded = df_powers.explode('Powers_Sep')
exploded[['hero_names', 'Powers', 'Powers_Sep']].head()

Unnamed: 0,hero_names,Powers,Powers_Sep
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed","Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...","Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du...","Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt...","Accelerated Healing,Intelligence,Super Strengt..."


In [22]:
#Saving the unique values from the exploded column
cols_to_make = exploded['Powers_Sep'].dropna().unique()
cols_to_make

array(['Agility,Super Strength,Stamina,Super Speed',
       'Accelerated Healing,Durability,Longevity,Super Strength,Stamina,Camouflage,Self-Sustenance',
       'Agility,Accelerated Healing,Cold Resistance,Durability,Underwater breathing,Marksmanship,Weapons Master,Longevity,Intelligence,Super Strength,Telepathy,Stamina,Immortality,Reflexes,Enhanced Sight,Sub-Mariner',
       'Lantern Power Ring',
       'Accelerated Healing,Intelligence,Super Strength,Stamina,Super Speed,Invulnerability,Animation,Super Breath',
       'Dimensional Awareness,Flight,Intelligence,Super Strength,Size Changing,Super Speed,Teleportation,Magic,Dimensional Travel,Immortality,Invulnerability,Molecular Manipulation,Energy Manipulation,Power Cosmic',
       'Cold Resistance,Durability,Energy Absorption,Super Strength,Invulnerability,Elemental Transmogrification,Fire Resistance,Natural Armor,Molecular Manipulation,Heat Resistance,Matter Absorption',
       'Accelerated Healing,Immortality,Regeneration',
       'D

In [23]:
for col in cols_to_make:
    df_powers[col] = df_powers['Powers'].str.contains(col)
df_powers.head()

  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['P

  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['P

  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['Powers'].str.contains(col)
  df_powers[col] = df_powers['P

Unnamed: 0,hero_names,Powers,Powers_Sep,"Agility,Super Strength,Stamina,Super Speed","Accelerated Healing,Durability,Longevity,Super Strength,Stamina,Camouflage,Self-Sustenance","Agility,Accelerated Healing,Cold Resistance,Durability,Underwater breathing,Marksmanship,Weapons Master,Longevity,Intelligence,Super Strength,Telepathy,Stamina,Immortality,Reflexes,Enhanced Sight,Sub-Mariner",Lantern Power Ring,"Accelerated Healing,Intelligence,Super Strength,Stamina,Super Speed,Invulnerability,Animation,Super Breath","Dimensional Awareness,Flight,Intelligence,Super Strength,Size Changing,Super Speed,Teleportation,Magic,Dimensional Travel,Immortality,Invulnerability,Molecular Manipulation,Energy Manipulation,Power Cosmic","Cold Resistance,Durability,Energy Absorption,Super Strength,Invulnerability,Elemental Transmogrification,Fire Resistance,Natural Armor,Molecular Manipulation,Heat Resistance,Matter Absorption",...,"Durability,Flight,Longevity,Super Strength,Energy Blasts,Size Changing,Stamina,Super Speed,Reflexes,Invulnerability,Self-Sustenance","Accelerated Healing,Durability,Flight,Marksmanship,Weapons Master,Longevity,Intelligence,Super Strength,Telepathy,Stamina,Super Speed,Animal Oriented Powers,Weapon-based Powers,Enhanced Senses,Dimensional Travel,Enhanced Memory,Reflexes,Force Fields,Fire Resistance,Enhanced Hearing,Hypnokinesis,Enhanced Smell,Vision - Telescopic,Toxin and Disease Resistance,Magic Resistance,Vision - Microscopic,Vision - Night,Vision - Infrared,Vision - X-Ray,Vision - Thermal","Agility,Accelerated Healing,Durability,Stealth,Marksmanship,Longevity,Super Strength,Stamina,Jump,Reflexes,Enhanced Hearing,Enhanced Sight,Natural Weapons,Enhanced Smell,Vision - Telescopic,Toxin and Disease Resistance,Vision - Night","Flight,Telepathy,Astral Travel,Teleportation,Telekinesis,Phasing,Astral Projection,Psionic Powers,Mind Control,Intangibility,Illusions","Size Changing,Animal Oriented Powers","Flight,Energy Blasts,Size Changing","Cold Resistance,Durability,Longevity,Super Strength,Cryokinesis,Immortality","Agility,Stealth,Danger Sense,Marksmanship,Weapons Master,Longevity,Intelligence,Telepathy,Energy Blasts,Stamina,Super Speed,Telekinesis,Jump,Reflexes,Force Fields,Empathy,Precognition,Cloaking,The Force","Cryokinesis,Telepathy,Magic,Fire Control,Probability Manipulation,Water Control,Terrakinesis,Weather Control","Super Speed,Intangibility,Time Travel,Time Manipulation"
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed","Agility,Super Strength,Stamina,Super Speed",True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...","Accelerated Healing,Durability,Longevity,Super...",False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du...","Agility,Accelerated Healing,Cold Resistance,Du...",False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,Abin Sur,Lantern Power Ring,Lantern Power Ring,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt...","Accelerated Healing,Intelligence,Super Strengt...",False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
