In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],
                   [np.nan, np.nan], [0.75, -1.3]],
                   index=['a', 'b', 'c', 'd'],
                   columns=['one', 'two'])
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [3]:
# DataFrame’s sum method returns a Series containing column sums:
df.sum()

one    9.25
two   -5.80
dtype: float64

In [4]:
# Passing axis='columns' or axis=1 sums across the columns instead:
df.sum(axis='columns')

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [6]:
# NA values are excluded unless the entire slice (row or column in this case) is NA.
# This can be disabled with the skipna option:
df.mean(axis='columns', skipna=False)

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [7]:
# Some methods, like idxmin and idxmax, return indirect statistics like the index value
# where the minimum or maximum values are attained:

df.idxmax()

one    b
two    d
dtype: object

In [8]:
# accumulations:
df.cumsum()

Unnamed: 0,one,two
a,1.4,
b,8.5,-4.5
c,,
d,9.25,-5.8


In [9]:
df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


In [11]:
# On non-numeric data, describe produces alternative summary statistics:
obj = pd.Series(['a', 'a', 'b', 'c'] * 4)
obj.describe()

count     16
unique     3
top        a
freq       8
dtype: object