In [1]:
import numpy as np
import pandas as pd

In [62]:
df = pd.DataFrame({
    'one': pd.Series(np.random.randn(3), index = ['a', 'b', 'c']),
    'two': pd.Series(np.random.randn(4), index = ['a', 'b', 'c', 'd']),
    'three': pd.Series(np.random.randn(3), index = ['b', 'c', 'd']),
})
df2 = df.copy()
df2

Unnamed: 0,one,two,three
a,-0.92633,0.37367,
b,0.075582,-0.42308,1.069148
c,-0.612162,-1.232719,-0.262003
d,,0.075448,0.270596


## Boolean Comparisons

In [63]:
df.gt(df2)
df2.ne(df)

# np.nan == np.nan returns False

Unnamed: 0,one,two,three
a,False,False,True
b,False,False,False
c,False,False,False
d,True,False,False


In [64]:
(df > 0).all()
(df > 0).any()
(df > 0).any().any()

True

In [65]:
pd.Series([True]).bool()
pd.Series([False]).bool()
pd.DataFrame([[True]]).bool()

True

## Objects Comparisons

In [66]:
pd.Series(['foo', 'bar', 'baz'])

0    foo
1    bar
2    baz
dtype: object

In [67]:
pd.Series(['foo', 'bar', 'baz']) == 'foo'
pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux'])

0     True
1     True
2    False
dtype: bool

In [70]:
(df + df == df * 2).all().all()
(df + df).equals(df * 2)

True

## Descriptive Statistics

In [72]:
df.mean()
df.mean(1)

a   -0.276330
b    0.240550
c   -0.702295
d    0.173022
dtype: float64

In [75]:
ts_stand = (df - df.mean()) / df.std()
ts_stand.std()

one      1.0
two      1.0
three    1.0
dtype: float64

In [80]:
series = pd.Series(np.random.randn(1000))
series[::2] = np.nan
series.describe()

count    500.000000
mean      -0.055059
std        0.996863
min       -2.982020
25%       -0.731319
50%       -0.053809
75%        0.618111
max        2.575541
dtype: float64

In [91]:
frame = pd.DataFrame(np.random.randn(1000, 5), columns = ['a', 'b', 'c', 'd', 'e'])
frame.iloc[::2] = np.nan
frame.describe()

Unnamed: 0,a,b,c,d,e
count,500.0,500.0,500.0,500.0,500.0
mean,-0.060638,-0.015821,0.064653,-0.080959,0.00342
std,0.993974,1.018144,0.980136,1.014138,0.970389
min,-2.76334,-3.15984,-2.764524,-3.712428,-2.732238
25%,-0.778595,-0.692851,-0.593954,-0.820893,-0.629339
50%,-0.055475,-0.068801,0.040544,-0.075277,0.043308
75%,0.609831,0.677304,0.743581,0.527007,0.642687
max,2.908487,3.274045,2.936665,3.277804,2.878577


In [96]:
s = pd.Series(['a','a', 'b', 'b', 'a', 'a', np.nan, 'c', 'd', 'a'])
s.describe()

count     9
unique    4
top       a
freq      5
dtype: object

In [100]:
s1 = pd.Series(np.random.randn(5))
s1

0    0.658560
1   -1.388871
2    1.169117
3    0.464620
4   -0.376023
dtype: float64

In [102]:
s1.idxmin(), s1.idxmax()

(1, 2)

In [105]:
df1 = pd.DataFrame(np.random.randn(5, 3), columns = ['A', 'B', 'C'])
df1

Unnamed: 0,A,B,C
0,0.411611,-1.149591,1.702377
1,-1.094111,-0.823944,1.355576
2,-0.421896,0.406863,0.020997
3,-1.755119,-0.474278,-2.387165
4,-1.978347,1.343691,-1.078091


In [107]:
df1.idxmin(), df1.idxmax(1)

(A    4
 B    0
 C    3
 dtype: int64,
 0    C
 1    C
 2    B
 3    B
 4    B
 dtype: object)

In [113]:
df = pd.DataFrame({
    'col1': np.random.randn(3),
    'col2': np.random.rand(3)},
    index = ['a', 'b', 'c']
)

for col in df:
    print(col)

col1
col2


In [118]:
df = pd.DataFrame({
    'a': ['a', 'b', 'c'],
    'b': ['a', 'b', 'c']
})
for label, ser in df.items():
    print(label)
    print(ser)

a
0    a
1    b
2    c
Name: a, dtype: object
b
0    a
1    b
2    c
Name: b, dtype: object


In [121]:
for row_index, row in df.iterrows():
    print(row_index, row, sep = '\n')

0
a    a
b    a
Name: 0, dtype: object
1
a    b
b    b
Name: 1, dtype: object
2
a    c
b    c
Name: 2, dtype: object


In [122]:
for row in df.itertuples():
    print(row)

Pandas(Index=0, a='a', b='a')
Pandas(Index=1, a='b', b='b')
Pandas(Index=2, a='c', b='c')
