In [1]:
import pandas as pd
index = pd.date_range('1/1/2000', periods=8)
print(index)

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')


In [2]:
import numpy as np

# numpy 라이브러리로 8행 3열로 구성된 랜덤 데이터를 생성 후, 인덱스와 컬럼 이름을 정하여 데이터 프레임으로 만듬

df = pd.DataFrame(np.random.rand(8, 3), index=index, columns=list('ABC'))
df

Unnamed: 0,A,B,C
2000-01-01,0.031597,0.650216,0.7474
2000-01-02,0.385274,0.689356,0.813032
2000-01-03,0.68467,0.62721,0.899527
2000-01-04,0.233603,0.722432,0.507518
2000-01-05,0.672389,0.051918,0.93603
2000-01-06,0.416998,0.662172,0.836519
2000-01-07,0.137413,0.323375,0.833934
2000-01-08,0.530207,0.458762,0.861724


In [3]:
# 특정 행이나 열을 선택하면 '인덱스'와 1차원 배열 형태의 데이터로 이루어진 '시리즈'라는 데이터 구조 형태로 표현 됨

print(df['B'])

2000-01-01    0.650216
2000-01-02    0.689356
2000-01-03    0.627210
2000-01-04    0.722432
2000-01-05    0.051918
2000-01-06    0.662172
2000-01-07    0.323375
2000-01-08    0.458762
Freq: D, Name: B, dtype: float64


In [4]:
# 마스크는 특정한 조건을 만족하는지에 따라 True False를 반환.
print(df['B'] > 0.4)

2000-01-01     True
2000-01-02     True
2000-01-03     True
2000-01-04     True
2000-01-05    False
2000-01-06     True
2000-01-07    False
2000-01-08     True
Freq: D, Name: B, dtype: bool


In [5]:
# 마스크를 활용해서 솎아내기

df2 = df[df['B'] > 0.4]
df2

Unnamed: 0,A,B,C
2000-01-01,0.031597,0.650216,0.7474
2000-01-02,0.385274,0.689356,0.813032
2000-01-03,0.68467,0.62721,0.899527
2000-01-04,0.233603,0.722432,0.507518
2000-01-06,0.416998,0.662172,0.836519
2000-01-08,0.530207,0.458762,0.861724


In [6]:
# 행과 열의 데이터를 뒤집기

df2.T

Unnamed: 0,2000-01-01,2000-01-02,2000-01-03,2000-01-04,2000-01-06,2000-01-08
A,0.031597,0.385274,0.68467,0.233603,0.416998,0.530207
B,0.650216,0.689356,0.62721,0.722432,0.662172,0.458762
C,0.7474,0.813032,0.899527,0.507518,0.836519,0.861724


In [22]:
# 행 방향 축을 기준으로 한 연산

import pandas as pd
import numpy as np

index = pd.date_range('1/1/2000', periods=8)
df = pd.DataFrame(np.random.rand(8, 3), index=index, columns=list('ABC'))
df['D'] = df['A']/df['B'] # A열의 값을 B열의 값으로 나누 값을 D열에 저장
df['E'] = np.sum(df, axis=1) # 행 우선 계산 값을 E열에 저장
df = df.sub(df['A'], axis=0) # A열의 데이터를 기준으로 열 우선 계산, 모든 데이터에서 A열의 값을 빼기
df = df.div(df['C'], axis=0) # C열의 데이터를 기준으로 열 우선 계산, 모든 데이터를 C열의 값으로 나누기
df.to_csv('test.csv') # 데이터 프레임을 csv파일로 저장

# 많은 데이터 중 처음 5개의 데이터만 확인
df.head()

Unnamed: 0,A,B,C,D,E
2000-01-01,-0.0,1.24203,1.0,-126.666749,-128.194107
2000-01-02,-0.0,10.063759,1.0,-19.93155,-70.167642
2000-01-03,-0.0,1.130367,1.0,-6.331162,-8.942958
2000-01-04,0.0,1.019447,1.0,0.013361,2.227944
2000-01-05,-0.0,-1.552844,1.0,-0.679737,-21.137682


In [12]:
import pandas as pd
import numpy as np

index = pd.date_range('1/1/2000', periods=8)
df = pd.DataFrame(np.random.rand(8, 3), index=index, columns=list('ABC'))
df['D'] = df['A']/df['B'] # A열의 값을 B열의 값으로 나누 값을 D열에 저장
df['E'] = np.sum(df, axis=1) # 행 우선 계산 값을 E열에 저장
df = df.sub(df['A'], axis=0) # A열의 데이터를 기준으로 열 우선 계산, 모든 데이터에서 A열의 값을 빼기

df

Unnamed: 0,A,B,C,D,E
2000-01-01,0.0,0.126158,0.533637,0.357954,2.230202
2000-01-02,0.0,0.454337,0.124843,0.075457,1.794586
2000-01-03,0.0,0.232927,0.379489,0.137594,2.470827
2000-01-04,0.0,0.290562,0.101158,0.0073,2.49633
2000-01-05,0.0,0.4229,0.170188,0.058869,0.812693
2000-01-06,0.0,-0.24278,-0.226169,0.587593,2.601345
2000-01-07,0.0,0.588069,-0.057852,0.053286,1.035503
2000-01-08,0.0,-0.338833,0.097524,8.912125,9.809904


In [10]:
import pandas as pd

data = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
df = pd.DataFrame(data)

# Sum along rows (axis=0), you get a sum for each column
sum_along_rows = df.sum(axis=0)
print("Sum along rows (axis=0):\n", sum_along_rows)

# Sum along columns (axis=1), you get a sum for each row
sum_along_columns = df.sum(axis=1)
print("\nSum along columns (axis=1):\n", sum_along_columns)
df.head()

Sum along rows (axis=0):
 A     6
B    15
C    24
dtype: int64

Sum along columns (axis=1):
 0    12
1    15
2    18
dtype: int64


Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9
