In [2]:
import pandas as pd
import numpy as np

In [3]:
data = [[1.4, np.nan],
       [7.1, -4.5],
       [np.nan, np.nan],
       [0.75, -1.3]]
df = pd.DataFrame(data, columns=["one","two"], index=["a","b","c","d"])

In [4]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [5]:
df.sum(axis=0)

one    9.25
two   -5.80
dtype: float64

In [6]:
df.sum(axis=1)

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [7]:
df["one"].sum()

9.25

In [8]:
df.loc["b"].sum()

2.5999999999999996

In [12]:
df.mean(axis=1, skipna=False)

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [13]:
one_mean = df.mean(axis=0)["one"]

In [14]:
two_min = df.min(axis=0)["two"]

In [15]:
df["one"] = df["one"].fillna(value=one_mean)
df["two"] = df["two"].fillna(value=two_min)

In [16]:
df

Unnamed: 0,one,two
a,1.4,-4.5
b,7.1,-4.5
c,3.083333,-4.5
d,0.75,-1.3


In [17]:
df2 = pd.DataFrame(np.random.randn(6,4),
                  columns=["A","B","C","D"],
                  index=pd.date_range("20160701",periods=6))

In [18]:
df2

Unnamed: 0,A,B,C,D
2016-07-01,0.058176,-1.016855,-0.986832,0.902385
2016-07-02,1.275798,-0.333652,-0.503585,0.546276
2016-07-03,0.662635,1.916435,2.033523,-0.195869
2016-07-04,-0.612766,0.393838,1.30573,0.348704
2016-07-05,-1.064835,-0.200526,0.204298,-0.533571
2016-07-06,0.325616,-0.205028,-0.31655,-1.241466


In [20]:
df2["A"].corr(df2["B"])

0.14832192970024605

In [21]:
df2["B"].cov(df2["C"])

1.0892756980307048

In [22]:
df2.corr()

Unnamed: 0,A,B,C,D
A,1.0,0.148322,-0.133954,0.17596
B,0.148322,1.0,0.940111,-0.244076
C,-0.133954,0.940111,1.0,-0.170005
D,0.17596,-0.244076,-0.170005,1.0


In [23]:
df2.cov()

Unnamed: 0,A,B,C,D
A,0.723251,0.126192,-0.131939,0.117708
B,0.126192,1.000841,1.089276,-0.192068
C,-0.131939,1.089276,1.341381,-0.154876
D,0.117708,-0.192068,-0.154876,0.618718


In [25]:
dates = df2.index
random_dates = np.random.permutation(dates)
df2 = df2.reindex(index=random_dates, columns=["D","B","C","A"])

In [26]:
df2

Unnamed: 0,D,B,C,A
2016-07-05,-0.533571,-0.200526,0.204298,-1.064835
2016-07-01,0.902385,-1.016855,-0.986832,0.058176
2016-07-04,0.348704,0.393838,1.30573,-0.612766
2016-07-06,-1.241466,-0.205028,-0.31655,0.325616
2016-07-03,-0.195869,1.916435,2.033523,0.662635
2016-07-02,0.546276,-0.333652,-0.503585,1.275798


In [27]:
df2.sort_index(axis=0)

Unnamed: 0,D,B,C,A
2016-07-01,0.902385,-1.016855,-0.986832,0.058176
2016-07-02,0.546276,-0.333652,-0.503585,1.275798
2016-07-03,-0.195869,1.916435,2.033523,0.662635
2016-07-04,0.348704,0.393838,1.30573,-0.612766
2016-07-05,-0.533571,-0.200526,0.204298,-1.064835
2016-07-06,-1.241466,-0.205028,-0.31655,0.325616


In [29]:
df2.sort_index(axis=1)

Unnamed: 0,A,B,C,D
2016-07-05,-1.064835,-0.200526,0.204298,-0.533571
2016-07-01,0.058176,-1.016855,-0.986832,0.902385
2016-07-04,-0.612766,0.393838,1.30573,0.348704
2016-07-06,0.325616,-0.205028,-0.31655,-1.241466
2016-07-03,0.662635,1.916435,2.033523,-0.195869
2016-07-02,1.275798,-0.333652,-0.503585,0.546276


In [30]:
df2.sort_index(axis=0, ascending=False) 

Unnamed: 0,D,B,C,A
2016-07-06,-1.241466,-0.205028,-0.31655,0.325616
2016-07-05,-0.533571,-0.200526,0.204298,-1.064835
2016-07-04,0.348704,0.393838,1.30573,-0.612766
2016-07-03,-0.195869,1.916435,2.033523,0.662635
2016-07-02,0.546276,-0.333652,-0.503585,1.275798
2016-07-01,0.902385,-1.016855,-0.986832,0.058176


In [31]:
df2.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2016-07-05,-0.533571,0.204298,-0.200526,-1.064835
2016-07-01,0.902385,-0.986832,-1.016855,0.058176
2016-07-04,0.348704,1.30573,0.393838,-0.612766
2016-07-06,-1.241466,-0.31655,-0.205028,0.325616
2016-07-03,-0.195869,2.033523,1.916435,0.662635
2016-07-02,0.546276,-0.503585,-0.333652,1.275798


In [32]:
df2

Unnamed: 0,D,B,C,A
2016-07-05,-0.533571,-0.200526,0.204298,-1.064835
2016-07-01,0.902385,-1.016855,-0.986832,0.058176
2016-07-04,0.348704,0.393838,1.30573,-0.612766
2016-07-06,-1.241466,-0.205028,-0.31655,0.325616
2016-07-03,-0.195869,1.916435,2.033523,0.662635
2016-07-02,0.546276,-0.333652,-0.503585,1.275798


In [36]:
df2.sort_values(by='D')

Unnamed: 0,D,B,C,A
2016-07-06,-1.241466,-0.205028,-0.31655,0.325616
2016-07-05,-0.533571,-0.200526,0.204298,-1.064835
2016-07-03,-0.195869,1.916435,2.033523,0.662635
2016-07-04,0.348704,0.393838,1.30573,-0.612766
2016-07-02,0.546276,-0.333652,-0.503585,1.275798
2016-07-01,0.902385,-1.016855,-0.986832,0.058176


In [37]:
df2.sort_values(by="B")

Unnamed: 0,D,B,C,A
2016-07-01,0.902385,-1.016855,-0.986832,0.058176
2016-07-02,0.546276,-0.333652,-0.503585,1.275798
2016-07-06,-1.241466,-0.205028,-0.31655,0.325616
2016-07-05,-0.533571,-0.200526,0.204298,-1.064835
2016-07-04,0.348704,0.393838,1.30573,-0.612766
2016-07-03,-0.195869,1.916435,2.033523,0.662635


In [38]:
df2["E"] = np.random.randint(0,6, size=6)
df2["F"] = ["alpha","beta","gamma","gamma","alpha","gamma"]

In [39]:
df2

Unnamed: 0,D,B,C,A,E,F
2016-07-05,-0.533571,-0.200526,0.204298,-1.064835,3,alpha
2016-07-01,0.902385,-1.016855,-0.986832,0.058176,1,beta
2016-07-04,0.348704,0.393838,1.30573,-0.612766,1,gamma
2016-07-06,-1.241466,-0.205028,-0.31655,0.325616,2,gamma
2016-07-03,-0.195869,1.916435,2.033523,0.662635,5,alpha
2016-07-02,0.546276,-0.333652,-0.503585,1.275798,1,gamma


In [40]:
df2.sort_values(by=["E","F"])

Unnamed: 0,D,B,C,A,E,F
2016-07-01,0.902385,-1.016855,-0.986832,0.058176,1,beta
2016-07-04,0.348704,0.393838,1.30573,-0.612766,1,gamma
2016-07-02,0.546276,-0.333652,-0.503585,1.275798,1,gamma
2016-07-06,-1.241466,-0.205028,-0.31655,0.325616,2,gamma
2016-07-05,-0.533571,-0.200526,0.204298,-1.064835,3,alpha
2016-07-03,-0.195869,1.916435,2.033523,0.662635,5,alpha


In [41]:
df2["F"].unique()

array(['alpha', 'beta', 'gamma'], dtype=object)

In [42]:
df2["F"].value_counts()

gamma    3
alpha    2
beta     1
Name: F, dtype: int64

In [43]:
df2["F"].isin(["alpha","beta"])

2016-07-05     True
2016-07-01     True
2016-07-04    False
2016-07-06    False
2016-07-03     True
2016-07-02    False
Name: F, dtype: bool

In [45]:
df2.loc[df2["F"].isin(["alpha","beta"]),:]

Unnamed: 0,D,B,C,A,E,F
2016-07-05,-0.533571,-0.200526,0.204298,-1.064835,3,alpha
2016-07-01,0.902385,-1.016855,-0.986832,0.058176,1,beta
2016-07-03,-0.195869,1.916435,2.033523,0.662635,5,alpha


In [46]:
df3 = pd.DataFrame(np.random.randn(4,3), columns=["b","d","e"],
                  index=["Seoul","Incheon","Busan","Daegu"])

In [47]:
df3

Unnamed: 0,b,d,e
Seoul,0.215799,-0.636351,0.242144
Incheon,-0.657669,0.653206,1.810914
Busan,0.160315,0.434453,1.319166
Daegu,-1.218617,0.627084,-0.11448


In [48]:
func = lambda x: x.max() - x.min()

In [49]:
df3.apply(func, axis=0)

b    1.434416
d    1.289557
e    1.925393
dtype: float64

In [50]:
df3.apply(func, axis=1)

Seoul      0.878495
Incheon    2.468582
Busan      1.158850
Daegu      1.845701
dtype: float64