# Advanced Transformation Core
Susan Shin

In [33]:
import pandas as pd
import numpy as np

In [34]:
info = pd.read_csv('Data/superhero_info - superhero_info.csv')
info.head()

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}"
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}"


In [35]:
info[['hero_names','publisher']] = info['Hero|Publisher'].str.split('|', expand=True)
info = info.drop(columns = 'Hero|Publisher')
info.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,hero_names,publisher
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


In [36]:
ms = info.loc[0,"Measurements"]
print(type(ms))
ms

<class 'str'>


"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"

In [37]:
ms = ms.replace("'",'"')
ms

'{"Height": "203.0 cm", "Weight": "441.0 kg"}'

In [38]:
import json
ms_fixed = json.loads(ms)
print(type(ms_fixed))
ms_fixed

<class 'dict'>


{'Height': '203.0 cm', 'Weight': '441.0 kg'}

In [39]:
## use .str.replace to replace all single quotes
info['Measurements'] = info['Measurements'].str.replace("'",'"')
## Apply the json.loads to the full column
info['Measurements'] = info['Measurements'].apply(json.loads)
info['Measurements'].head()

0    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
1     {'Height': '191.0 cm', 'Weight': '65.0 kg'}
2     {'Height': '185.0 cm', 'Weight': '90.0 kg'}
3    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
4    {'Height': '193.0 cm', 'Weight': '122.0 kg'}
Name: Measurements, dtype: object

In [40]:
height_weight = info['Measurements'].apply(pd.Series)
height_weight

Unnamed: 0,Height,Weight
0,203.0 cm,441.0 kg
1,191.0 cm,65.0 kg
2,185.0 cm,90.0 kg
3,203.0 cm,441.0 kg
4,193.0 cm,122.0 kg
...,...,...
458,183.0 cm,83.0 kg
459,165.0 cm,52.0 kg
460,66.0 cm,17.0 kg
461,170.0 cm,57.0 kg


In [41]:
info = pd.concat((info, height_weight), axis = 1)
info.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,hero_names,publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics,203.0 cm,441.0 kg
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics,191.0 cm,65.0 kg


In [42]:
info = info.drop(columns=['Measurements'])
info.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,hero_names,publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0 cm,441.0 kg
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0 cm,65.0 kg
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185.0 cm,90.0 kg
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203.0 cm,441.0 kg
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193.0 cm,122.0 kg


In [43]:
## save the 2 new columns into the dataframe
info[['Height (in CM)','cm']] = info['Height'].str.split(' ',expand=True)
info[['Weight (in KG)','kg']] = info['Weight'].str.split(' ',expand=True)
info.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,hero_names,publisher,Height,Weight,Height (in CM),cm,Weight (in KG),kg
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0 cm,441.0 kg,203.0,cm,441.0,kg
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0 cm,65.0 kg,191.0,cm,65.0,kg


In [44]:
## drop the original column 
info = info.drop(columns=['Height', 'Weight', 'cm', 'kg'])
info.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,hero_names,publisher,Height (in CM),Weight (in KG)
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0,441.0
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0,65.0


In [45]:
info['Height (in CM)'] = info['Height (in CM)'].astype(float)
info['Weight (in KG)'] = info['Weight (in KG)'].astype(float)
info.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,hero_names,publisher,Height (in CM),Weight (in KG)
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0,441.0
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0,65.0


In [46]:
info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 463 entries, 0 to 462
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Gender          463 non-null    object 
 1   Race            463 non-null    object 
 2   Alignment       463 non-null    object 
 3   Hair color      463 non-null    object 
 4   Eye color       463 non-null    object 
 5   Skin color      463 non-null    object 
 6   hero_names      463 non-null    object 
 7   publisher       463 non-null    object 
 8   Height (in CM)  463 non-null    float64
 9   Weight (in KG)  463 non-null    float64
dtypes: float64(2), object(8)
memory usage: 36.3+ KB


In [47]:
powers = pd.read_csv('Data/superhero_powers - superhero_powers.csv')
powers.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [48]:
df = pd.merge(powers, info, how='inner', on = 'hero_names')
df.head()

Unnamed: 0,hero_names,Powers,Gender,Race,Alignment,Hair color,Eye color,Skin color,publisher,Height (in CM),Weight (in KG)
0,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...",Male,Human,good,No Hair,yellow,Unknown,Marvel Comics,203.0,441.0
1,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du...",Male,Icthyo Sapien,good,No Hair,blue,blue,Dark Horse Comics,191.0,65.0
2,Abin Sur,Lantern Power Ring,Male,Ungaran,good,No Hair,blue,red,DC Comics,185.0,90.0
3,Abomination,"Accelerated Healing,Intelligence,Super Strengt...",Male,Human / Radiation,bad,No Hair,green,Unknown,Marvel Comics,203.0,441.0
4,Absorbing Man,"Cold Resistance,Durability,Energy Absorption,S...",Male,Human,bad,No Hair,blue,Unknown,Marvel Comics,193.0,122.0


In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 463 entries, 0 to 462
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   hero_names      463 non-null    object 
 1   Powers          463 non-null    object 
 2   Gender          463 non-null    object 
 3   Race            463 non-null    object 
 4   Alignment       463 non-null    object 
 5   Hair color      463 non-null    object 
 6   Eye color       463 non-null    object 
 7   Skin color      463 non-null    object 
 8   publisher       463 non-null    object 
 9   Height (in CM)  463 non-null    float64
 10  Weight (in KG)  463 non-null    float64
dtypes: float64(2), object(9)
memory usage: 43.4+ KB


In [50]:
pwrs = df.loc[0,"Powers"]
print(type(pwrs))
pwrs

<class 'str'>


'Accelerated Healing,Durability,Longevity,Super Strength,Stamina,Camouflage,Self-Sustenance'

In [51]:
pwrs = df["Powers"].str.split(",", expand = True)
pwrs

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,34,35,36,37,38,39,40,41,42,43
0,Accelerated Healing,Durability,Longevity,Super Strength,Stamina,Camouflage,Self-Sustenance,,,,...,,,,,,,,,,
1,Agility,Accelerated Healing,Cold Resistance,Durability,Underwater breathing,Marksmanship,Weapons Master,Longevity,Intelligence,Super Strength,...,,,,,,,,,,
2,Lantern Power Ring,,,,,,,,,,...,,,,,,,,,,
3,Accelerated Healing,Intelligence,Super Strength,Stamina,Super Speed,Invulnerability,Animation,Super Breath,,,...,,,,,,,,,,
4,Cold Resistance,Durability,Energy Absorption,Super Strength,Invulnerability,Elemental Transmogrification,Fire Resistance,Natural Armor,Molecular Manipulation,Heat Resistance,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458,Size Changing,Animal Oriented Powers,,,,,,,,,...,,,,,,,,,,
459,Flight,Energy Blasts,Size Changing,,,,,,,,...,,,,,,,,,,
460,Agility,Stealth,Danger Sense,Marksmanship,Weapons Master,Longevity,Intelligence,Telepathy,Energy Blasts,Stamina,...,,,,,,,,,,
461,Cryokinesis,Telepathy,Magic,Fire Control,Probability Manipulation,Water Control,Terrakinesis,Weather Control,,,...,,,,,,,,,,


Compare the average weight of super powers who have Super Speed to those who do not.
What is the average height of heroes for each publisher?