In [1]:
%matplotlib inline

import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

plt.style.use('fivethirtyeight')
plt.rc('figure', figsize=(5.0, 2.0))

with open('../data/heroes.csv', 'r') as heroes_file:
    heroes_reader = csv.reader(heroes_file, delimiter=';', quotechar='"')
    heroes = list(heroes_reader)[1:]

In [2]:
names = [h[0] for h in heroes]
years = [int(h[7]) if h[7] else None for h in heroes]

In [3]:
first_appearance_s = pd.Series(years, index=names)
first_appearance_s.head()

A-Bomb         2008.0
Abraxas           NaN
Abomination       NaN
Adam Monroe       NaN
Agent 13          NaN
dtype: float64

In [4]:
first_appearance_s.loc['Wonder Woman']

1941.0

In [5]:
first_appearance_s.iloc[128]

1992.0

In [6]:
first_appearance_s.loc['Wonder Girl':'Wonder Woman']

Wonder Girl     1996.0
Wonder Woman    1941.0
dtype: float64

In [7]:
first_appearance_s.iloc[126:128]

Spider-Woman II       NaN
Spider-Woman       1977.0
dtype: float64

In [8]:
first_appearance_s.iloc[[42, 24, 6]]

Warbird         NaN
X-23            NaN
Agent Bob    2007.0
dtype: float64

In [9]:
first_appearance_s.loc[['Rogue', 'A-Bomb']]

Rogue     1981.0
A-Bomb    2008.0
dtype: float64

In [10]:
first_appearance_s[first_appearance_s < 1975] \
    .head()

Abin Sur         1959.0
Animal Man       1965.0
Absorbing Man    1964.0
Wonder Woman     1941.0
Watcher          1963.0
dtype: float64

In [11]:
first_appearance_s.value_counts() \
    .head()

1964.0    18
1963.0    18
1965.0    14
2004.0    11
1976.0    10
dtype: int64

In [12]:
first_app_absfreq = first_appearance_s.value_counts().sort_index(ascending=True)
first_app_absfreq \
    .head()

1933.0    1
1939.0    1
1940.0    9
1941.0    7
1943.0    2
dtype: int64

In [13]:
sum(first_app_absfreq.loc[1960:])

330

In [14]:
sum(first_app_absfreq.loc[1940:1966])

106

In [15]:
sum(first_app_absfreq.loc[:1971])

130

In [16]:
heroes_height = [float(h[4]) if h[4] else None for h in heroes]
heroes_weight = [float(h[5]) if h[5] else None for h in heroes]
height = pd.Series(heroes_height, index=names)
weight = pd.Series(heroes_weight, index=names)

(height / 100) \
    .head()

A-Bomb         2.0321
Abraxas           NaN
Abomination    2.0304
Adam Monroe       NaN
Agent 13       1.7341
dtype: float64

In [17]:
height.apply(lambda h: (h / 100)**2) \
    .head()

A-Bomb         4.129430
Abraxas             NaN
Abomination    4.122524
Adam Monroe         NaN
Agent 13       3.007103
dtype: float64

In [18]:
imc = weight / height.apply(lambda h: (h / 100)**2)
imc.sort_values(ascending=False) \
    .head()

Utgard-Loki    2501.321629
Giganta        1607.124545
Red Hulk        137.611973
Darkseid        114.366701
Machine Man     114.083519
dtype: float64

In [19]:
standard_weight = weight[(weight < 100) & (weight > 40)]
standard_height = height[(height < 210) & (height > 120)]
standard_imc = standard_weight / ((standard_height / 100)**2)
standard_imc \
    .head()

A-Bomb                 NaN
Abe Sapien       17.868501
Abin Sur         26.410852
Abomination            NaN
Absorbing Man          NaN
dtype: float64