In [125]:
import pandas as pd
import numpy as np
df = pd.read_csv('resources/dataset_test.csv', index_col=0)

In [126]:
df.dtypes

Hogwarts House                   float64
First Name                        object
Last Name                         object
Birthday                          object
Best Hand                         object
Arithmancy                       float64
Astronomy                        float64
Herbology                        float64
Defense Against the Dark Arts    float64
Divination                       float64
Muggle Studies                   float64
Ancient Runes                    float64
History of Magic                 float64
Transfiguration                  float64
Potions                          float64
Care of Magical Creatures        float64
Charms                           float64
Flying                           float64
dtype: object

In [131]:
import math

def count(f):
    n = 0
    for c in f:
        n += not math.isnan(c)
    return n

def mean(f):
    f = list(filter(lambda x: not math.isnan(x), f))
    if len(f) > 0:
        return sum(f) / count(f)
    return float('nan')

def std(f):
    f_mean = mean(f)
    return mean([(c - f_mean) ** 2 for c in f]) ** .5

In [133]:
columns = []
statistics = {'count':[],
              'mean': [],
              'std': [],
              'min': [],
              '25%': [],
              '50%': [],
              '75%': [],
              'max': []}
for c, d in zip(df.dtypes.index, df.dtypes):
    d = str(d)
    if d.startswith('int') or d.startswith("float"):
        columns.append(c)
        
        feature = df[c]
        statistics['count'].append(count(feature))
        feature = [c for c in feature if not math.isnan(c)]
        feature = sorted(feature)
        n = len(feature)
        
        if n > 0:
            f_min = feature[0]
            f_max = feature[-1]
            f_25 = feature[int(n * .25)]
            f_50 = feature[int(n * .5)]
            f_75 = feature[int(n * .75)]
        else:
            f_min = f_max = f_25 = f_50 = f_75 = float('nan')
        
        statistics['mean'].append(mean(feature))
        statistics['std'].append(std(feature))
        statistics['min'].append(f_min)
        statistics['25%'].append(f_25)
        statistics['50%'].append(f_50)
        statistics['75%'].append(f_75)
        statistics['max'].append(f_max)

pd.DataFrame(statistics, index=columns).T

Unnamed: 0,Hogwarts House,Arithmancy,Astronomy,Herbology,Defense Against the Dark Arts,Divination,Muggle Studies,Ancient Runes,History of Magic,Transfiguration,Potions,Care of Magical Creatures,Charms,Flying
count,0.0,387.0,387.0,389.0,392.0,394.0,390.0,392.0,389.0,389.0,390.0,392.0,400.0,400.0
mean,,50088.971576,48.155326,1.385517,-0.537843,3.411071,-220.169594,495.937543,2.829816,1030.885777,5.77686,0.022985,-243.181109,23.36745
std,,15452.744803,511.875159,5.048793,5.112229,3.88625,497.030335,101.52767,4.311657,45.288575,3.213497,1.017705,8.716274,95.066887
min,,4536.0,-802.72518,-9.687662,-8.700635,-8.183,-1041.323658,319.36025,-7.18909,906.93205,-1.16206,-2.871119,-259.86678,-143.52
25%,,40142.0,-485.300284,-4.183731,-5.205897,3.527,-580.802421,400.888868,2.227669,1028.499974,3.226146,-0.645976,-250.14318,-38.44
50%,,49238.0,292.108738,3.63049,-2.91022,4.717,-407.494571,486.508729,4.290164,1047.648405,5.733112,0.063305,-244.74546,-2.26
75%,,60182.0,511.393084,5.405685,4.785424,5.573,225.573185,591.524186,5.662488,1060.367924,8.137329,0.720617,-232.77461,45.85
max,,99744.0,870.063498,9.678462,8.027252,8.604,939.317135,667.674165,10.366995,1099.966073,13.390013,3.205525,-226.00382,282.43


In [18]:
df.describe()

Unnamed: 0,Arithmancy,Astronomy,Herbology,Defense Against the Dark Arts,Divination,Muggle Studies,Ancient Runes,History of Magic,Transfiguration,Potions,Care of Magical Creatures,Charms,Flying
count,1566.0,1568.0,1567.0,1569.0,1561.0,1565.0,1565.0,1557.0,1566.0,1570.0,1560.0,1600.0,1600.0
mean,49634.570243,39.797131,1.14102,-0.387863,3.15391,-224.589915,495.74797,2.963095,1030.096946,5.950373,-0.053427,-243.374409,21.958012
std,16679.806036,520.298268,5.219682,5.212794,4.155301,486.34484,106.285165,4.425775,44.125116,3.147854,0.971457,8.78364,97.631602
min,-24370.0,-966.740546,-10.295663,-10.162119,-8.727,-1086.496835,283.869609,-8.858993,906.62732,-4.697484,-3.313676,-261.04892,-181.47
25%,38511.5,-489.551387,-4.308182,-5.259095,3.099,-577.580096,397.511047,2.218653,1026.209993,3.646785,-0.671606,-250.6526,-41.87
50%,49013.5,260.289446,3.469012,-2.589342,4.624,-419.164294,463.918305,4.378176,1045.506996,5.874837,-0.044811,-244.867765,-2.515
75%,60811.25,524.771949,5.419183,4.90468,5.667,254.994857,597.49223,5.825242,1058.43641,8.248173,0.589919,-232.552305,50.56
max,104956.0,1016.21194,11.612895,9.667405,10.032,1092.388611,745.39622,11.889713,1098.958201,13.536762,3.056546,-225.42814,279.07
