In [1]:
import pandas as pd
import numpy as np

In [8]:
data = [[1.4, np.nan], 
        [7.1, -4.5], 
        [np.nan, np.nan], 
        [0.75, -1.3]]
df = pd.DataFrame(data, columns = ["one", "two"], index = ["a", "b", "c", "d"])

In [9]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [13]:
df.sum(axis=0) # axis = 0 은 행방향 기준으로 = 전체의 열을 더함 / sum함수로 합을 구할 시 NaN이 default로 제외된다

one    9.25
two   -5.80
dtype: float64

In [14]:
df.sum(axis = 1) # axis = 1 열방향 기준으로 = 전체의 행을 더함

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [15]:
df["one"].sum()

9.25

In [17]:
df.loc["b"].sum() # 행 index기준으로 구할 때는 늘 loc사용

2.5999999999999996

In [19]:
# axis = 1 열 방향 
df.mean(axis = 1, skipna=False)

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [20]:
one_mean = df.mean(axis =0)["one"]

In [21]:
two_min = df.min(axis = 0)["two"]

In [22]:
df["one"] = df["one"].fillna(value = one_mean)

In [23]:
df["two"] = df["two"].fillna(value = two_min)

In [24]:
df

Unnamed: 0,one,two
a,1.4,-4.5
b,7.1,-4.5
c,3.083333,-4.5
d,0.75,-1.3


In [25]:
df2 = pd.DataFrame(np.random.randn(6,4),
                  columns = ["A", "B", "C", "D"],
                  index = pd.date_range("20160701", periods = 6))

In [26]:
df2

Unnamed: 0,A,B,C,D
2016-07-01,0.179054,-0.393727,-1.227297,-0.470906
2016-07-02,0.795337,0.390497,1.852094,0.897029
2016-07-03,1.523164,-1.251221,0.389532,1.461071
2016-07-04,-0.516295,-1.415513,1.242721,-0.514151
2016-07-05,2.071471,-1.833716,0.006836,-0.07233
2016-07-06,-0.681095,0.597763,0.987452,-1.546358


In [29]:
df2["A"].corr(df2["B"]) # A열과 B열에 대한 상관계수

-0.5300255829948751

In [30]:
df2["B"].cov(df2["C"])

0.3308561648143835

In [33]:
df2.corr() # df2에 포함된 모든 열들간의 상관계수

Unnamed: 0,A,B,C,D
A,1.0,-0.530026,-0.214223,0.698256
B,-0.530026,1.0,0.30435,-0.279774
C,-0.214223,0.30435,1.0,0.13299
D,0.698256,-0.279774,0.13299,1.0


In [34]:
df2.cov() # df2에 포함된 모든 열들간의 공분산

Unnamed: 0,A,B,C,D
A,1.222288,-0.58913,-0.256089,0.832291
B,-0.58913,1.010776,0.330856,-0.303255
C,-0.256089,0.330856,1.169169,0.155035
D,0.832291,-0.303255,0.155035,1.162378


In [35]:
dates = df2.index
random_dates = np.random.permutation(dates)
df2 = df2.reindex(index = random_dates, columns = ["D", "B", "C", "A"])

In [36]:
df2

Unnamed: 0,D,B,C,A
2016-07-06,-1.546358,0.597763,0.987452,-0.681095
2016-07-03,1.461071,-1.251221,0.389532,1.523164
2016-07-02,0.897029,0.390497,1.852094,0.795337
2016-07-05,-0.07233,-1.833716,0.006836,2.071471
2016-07-04,-0.514151,-1.415513,1.242721,-0.516295
2016-07-01,-0.470906,-0.393727,-1.227297,0.179054


In [37]:
df2.sort_index(axis=0) #df index기준 행 방향 오름차순

Unnamed: 0,D,B,C,A
2016-07-01,-0.470906,-0.393727,-1.227297,0.179054
2016-07-02,0.897029,0.390497,1.852094,0.795337
2016-07-03,1.461071,-1.251221,0.389532,1.523164
2016-07-04,-0.514151,-1.415513,1.242721,-0.516295
2016-07-05,-0.07233,-1.833716,0.006836,2.071471
2016-07-06,-1.546358,0.597763,0.987452,-0.681095


In [38]:
df2.sort_index(axis=1)

Unnamed: 0,A,B,C,D
2016-07-06,-0.681095,0.597763,0.987452,-1.546358
2016-07-03,1.523164,-1.251221,0.389532,1.461071
2016-07-02,0.795337,0.390497,1.852094,0.897029
2016-07-05,2.071471,-1.833716,0.006836,-0.07233
2016-07-04,-0.516295,-1.415513,1.242721,-0.514151
2016-07-01,0.179054,-0.393727,-1.227297,-0.470906


In [39]:
df2.sort_index(axis=0, ascending=False)

Unnamed: 0,D,B,C,A
2016-07-06,-1.546358,0.597763,0.987452,-0.681095
2016-07-05,-0.07233,-1.833716,0.006836,2.071471
2016-07-04,-0.514151,-1.415513,1.242721,-0.516295
2016-07-03,1.461071,-1.251221,0.389532,1.523164
2016-07-02,0.897029,0.390497,1.852094,0.795337
2016-07-01,-0.470906,-0.393727,-1.227297,0.179054


In [40]:
df2.sort_values(by="D")

Unnamed: 0,D,B,C,A
2016-07-06,-1.546358,0.597763,0.987452,-0.681095
2016-07-04,-0.514151,-1.415513,1.242721,-0.516295
2016-07-01,-0.470906,-0.393727,-1.227297,0.179054
2016-07-05,-0.07233,-1.833716,0.006836,2.071471
2016-07-02,0.897029,0.390497,1.852094,0.795337
2016-07-03,1.461071,-1.251221,0.389532,1.523164


In [41]:
df2.sort_values(by = "B")

Unnamed: 0,D,B,C,A
2016-07-05,-0.07233,-1.833716,0.006836,2.071471
2016-07-04,-0.514151,-1.415513,1.242721,-0.516295
2016-07-03,1.461071,-1.251221,0.389532,1.523164
2016-07-01,-0.470906,-0.393727,-1.227297,0.179054
2016-07-02,0.897029,0.390497,1.852094,0.795337
2016-07-06,-1.546358,0.597763,0.987452,-0.681095


In [42]:
df3 = pd.DataFrame(np.random.randn(4,3), columns=["b", "d", "e"], index = ["Seoul", "Incheon", "Busan", "Daegu"])

In [43]:
df3

Unnamed: 0,b,d,e
Seoul,-2.861788,2.937316,0.051103
Incheon,0.869424,0.279148,-0.806276
Busan,0.42433,1.699941,0.031476
Daegu,1.863072,-0.28493,2.609074


In [46]:
func = lambda x: x.max()-x.min() # 함수 생성

In [48]:
#적용 / 행 방향
df3.apply(func, axis = 0)

b    4.724860
d    3.222246
e    3.415349
dtype: float64

In [49]:
df3.apply(func, axis=1)

Seoul      5.799104
Incheon    1.675700
Busan      1.668465
Daegu      2.894004
dtype: float64