In [1]:
%matplotlib inline

import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

plt.style.use('fivethirtyeight')
plt.rc('figure', figsize=(5.0, 2.0))

In [2]:
heroes = pd.read_csv('../data/heroes.csv', sep=';', index_col=0)
heroes = heroes[heroes['First appearance'] < 2020]

In [3]:
heroes['Publisher'].value_counts()

Marvel Comics        205
DC Comics            121
Dark Horse Comics     12
George Lucas          11
ABC Studios            4
Image Comics           3
Rebellion              1
Hanna-Barbera          1
Star Trek              1
Universal Studios      1
Name: Publisher, dtype: int64

In [4]:
publisher_abs_freq = pd.crosstab(
    index=heroes['Publisher'],
    columns='Abs. frequence',
    colnames=[''])

publisher_abs_freq

Unnamed: 0_level_0,Abs. frequence
Publisher,Unnamed: 1_level_1
ABC Studios,4
DC Comics,121
Dark Horse Comics,12
George Lucas,11
Hanna-Barbera,1
Image Comics,3
Marvel Comics,205
Rebellion,1
Star Trek,1
Universal Studios,1


In [5]:
publisher_rel_freq = pd.crosstab(
    index=heroes['Publisher'],
    columns='Abs. frequence',
    colnames=[''],
    normalize=True)

publisher_rel_freq.apply(lambda p: np.round(p, 2))

Unnamed: 0_level_0,Abs. frequence
Publisher,Unnamed: 1_level_1
ABC Studios,0.01
DC Comics,0.34
Dark Horse Comics,0.03
George Lucas,0.03
Hanna-Barbera,0.0
Image Comics,0.01
Marvel Comics,0.57
Rebellion,0.0
Star Trek,0.0
Universal Studios,0.0


In [6]:
print(int(heroes['First appearance'].min()), int(heroes['First appearance'].max()))

(1933, 2016)


In [7]:
first_app_cumsum = pd.crosstab(
    index=heroes['First appearance'],
    columns=['Cum. frequence'],
    colnames=['']).cumsum()

first_app_cumsum.iloc[:10]

Unnamed: 0_level_0,Cum. frequence
First appearance,Unnamed: 1_level_1
1933.0,1
1939.0,2
1940.0,11
1941.0,18
1943.0,20
1944.0,22
1945.0,23
1947.0,25
1948.0,26
1950.0,27


In [8]:
first_app_cumsum.iloc[-1, 0] - first_app_cumsum.loc[1980.0, 'Cum. frequence']

172

In [9]:
first_app_rel_cumsum = pd.crosstab(
    index=heroes['First appearance'],
    columns=['Cum. frequence'],
    colnames=[''],
    normalize=True).cumsum()

first_app_rel_cumsum.iloc[-10:]

Unnamed: 0_level_0,Cum. frequence
First appearance,Unnamed: 1_level_1
2006.0,0.923497
2007.0,0.937158
2008.0,0.953552
2009.0,0.956284
2010.0,0.967213
2011.0,0.980874
2012.0,0.986339
2013.0,0.989071
2015.0,0.994536
2016.0,1.0


In [10]:
intelligence = pd.crosstab(
    index=heroes['Intelligence'],
    columns=heroes['Gender'])

intelligence

Gender,F,M
Intelligence,Unnamed: 1_level_1,Unnamed: 2_level_1
average,22,67
good,37,97
high,13,63
low,0,9
moderate,11,22


In [11]:
intelligence.loc[['low', 'moderate', 'average', 'good', 'high'], :]

Gender,F,M
Intelligence,Unnamed: 1_level_1,Unnamed: 2_level_1
low,0,9
moderate,11,22
average,22,67
good,37,97
high,13,63


In [12]:
intelligence = pd.crosstab(
    index=heroes['Intelligence'],
    columns=heroes['Gender'],
    margins=True)

intelligence.loc[['low', 'moderate', 'average', 'good', 'high'], :]

Gender,F,M,All
Intelligence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
low,0,9,9
moderate,11,22,33
average,22,67,89
good,37,97,134
high,13,63,76


In [13]:
intelligence = pd.crosstab(
    index=heroes['Intelligence'],
    columns=heroes['Gender'],
    margins=True,
    normalize='all')

intelligence.loc[['low', 'moderate', 'average', 'good', 'high', 'All'], :].apply(lambda p: np.round(p, 3))

Gender,F,M,All
Intelligence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
low,0.0,0.026,0.026
moderate,0.032,0.065,0.097
average,0.065,0.196,0.261
good,0.109,0.284,0.393
high,0.038,0.185,0.223
All,0.243,0.757,1.0


In [14]:
intelligence = pd.crosstab(
    index=heroes['Intelligence'],
    columns=heroes['Gender'],
    margins=True,
    normalize='index')

intelligence.loc[['low', 'moderate', 'average', 'good', 'high'], :].apply(lambda p: np.round(p, 3))

Gender,F,M
Intelligence,Unnamed: 1_level_1,Unnamed: 2_level_1
low,0.0,1.0
moderate,0.333,0.667
average,0.247,0.753
good,0.276,0.724
high,0.171,0.829


In [15]:
intelligence = pd.crosstab(
    index=heroes['Intelligence'],
    columns=heroes['Gender'],
    margins=True,
    normalize='columns')

intelligence.loc[['low', 'moderate', 'average', 'good', 'high'], :].apply(lambda p: np.round(p, 3))

Gender,F,M,All
Intelligence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
low,0.0,0.035,0.026
moderate,0.133,0.085,0.097
average,0.265,0.26,0.261
good,0.446,0.376,0.393
high,0.157,0.244,0.223
