## 5.1 Query

In [10]:
from pandas import DataFrame

data = [
    {"cd":"A060310", "nm":"3S", "open":2920, "close":2920},
    {"cd":"A095570", "nm":"AJ네트웍스", "open":1920, "close":6250},
    {"cd":"A006840", "nm":"AK홀딩스", "open":2020, "close":29700},
    {"cd":"A054620", "nm":"APS홀딩스", "open":3120, "close":19400}
]
df = DataFrame(data=data)
df

Unnamed: 0,cd,nm,open,close
0,A060310,3S,2920,2920
1,A095570,AJ네트웍스,1920,6250
2,A006840,AK홀딩스,2020,29700
3,A054620,APS홀딩스,3120,19400


In [11]:
df = df.set_index("cd")
print(df)

             nm  open  close
cd                          
A060310      3S  2920   2920
A095570  AJ네트웍스  1920   6250
A006840   AK홀딩스  2020  29700
A054620  APS홀딩스  3120  19400


In [12]:
cond = df['open'] >= 2000
print(df[cond])

             nm  open  close
cd                          
A060310      3S  2920   2920
A006840   AK홀딩스  2020  29700
A054620  APS홀딩스  3120  19400


In [13]:
print(df.query("nm == '3S'"))

         nm  open  close
cd                      
A060310  3S  2920   2920


In [14]:
print(df.query("nm == 'AK홀딩스'"))

            nm  open  close
cd                         
A006840  AK홀딩스  2020  29700


In [16]:
print(df.query("open < close"))

             nm  open  close
cd                          
A095570  AJ네트웍스  1920   6250
A006840   AK홀딩스  2020  29700
A054620  APS홀딩스  3120  19400


In [17]:
print(df.query("nm in ['3S', 'AK홀딩스']"))

            nm  open  close
cd                         
A060310     3S  2920   2920
A006840  AK홀딩스  2020  29700


In [19]:
df.query("cd == 'A060310'")

Unnamed: 0_level_0,nm,open,close
cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A060310,3S,2920,2920


In [21]:
name = "AJ네트웍스"

df.query('nm == @name')

Unnamed: 0_level_0,nm,open,close
cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A095570,AJ네트웍스,1920,6250


## 5.2 Filter

In [22]:
from pandas import DataFrame

data = [
    [1412, 23, 243, 345],
    [2.423, 234.4, 2165, 45],
    [1.1, 1.45, 2.6, 1.66]
]

In [23]:
col = ["2024/01", "2024/02", "2024/03", "2024/04(E)"]
index = ["DPS", "PER", "PBR"]

df = DataFrame(data = data, index = index, columns = col)
print(df)

      2024/01  2024/02  2024/03  2024/04(E)
DPS  1412.000    23.00    243.0      345.00
PER     2.423   234.40   2165.0       45.00
PBR     1.100     1.45      2.6        1.66


In [24]:
df.filter(items = ["2024/01"])

Unnamed: 0,2024/01
DPS,1412.0
PER,2.423
PBR,1.1


In [27]:
df.filter(items = ["PER"], axis = 0)

Unnamed: 0,2024/01,2024/02,2024/03,2024/04(E)
PER,2.423,234.4,2165.0,45.0


In [31]:
df.filter(regex = "03")

Unnamed: 0,2024/03
DPS,243.0
PER,2165.0
PBR,2.6


In [32]:
df.filter(regex = "R$", axis = 0)

Unnamed: 0,2024/01,2024/02,2024/03,2024/04(E)
PER,2.423,234.4,2165.0,45.0
PBR,1.1,1.45,2.6,1.66


In [33]:
df.filter(regex = "\d{4}")

  df.filter(regex = "\d{4}")


Unnamed: 0,2024/01,2024/02,2024/03,2024/04(E)
DPS,1412.0,23.0,243.0,345.0
PER,2.423,234.4,2165.0,45.0
PBR,1.1,1.45,2.6,1.66


In [34]:
df.filter(regex = "\d{4}/\d{2}$")

  df.filter(regex = "\d{4}/\d{2}$")


Unnamed: 0,2024/01,2024/02,2024/03
DPS,1412.0,23.0,243.0
PER,2.423,234.4,2165.0
PBR,1.1,1.45,2.6


## 5.3 정렬 및 순위

In [35]:
data = [
    ["037730", "3R", 1510],
    ["036360", "3SOFT", 1790],
    ["005670", "ACTS", 1185]
]

col = ["종목코드", "종목명", "현재가"]
df = DataFrame(data = data, columns = col)

In [36]:
df.set_index("종목코드", inplace = True)

In [41]:
df2 = df.sort_values(by = "현재가")
print(df2)

          종목명   현재가
종목코드               
005670   ACTS  1185
037730     3R  1510
036360  3SOFT  1790


In [38]:
df2 = df.sort_values(by = "현재가", ascending = False)

In [39]:
df2

Unnamed: 0_level_0,종목명,현재가
종목코드,Unnamed: 1_level_1,Unnamed: 2_level_1
36360,3SOFT,1790
37730,3R,1510
5670,ACTS,1185


## 5.4 인덱스 연산

In [2]:
import pandas as pd

idx1 = pd.Index([1,2,3])
idx2 = pd.Index([2,3,4])

print(type(idx1))

<class 'pandas.core.indexes.base.Index'>


In [3]:
idx1.union(idx2)

Index([1, 2, 3, 4], dtype='int64')

In [5]:
idx1.intersection(idx2)

Index([2, 3], dtype='int64')

In [6]:
idx1.difference(idx2)

Index([1], dtype='int64')

## 5.5 groyp by

In [9]:
# 데이터 프레임 생성에 필요한 정보 정리
data = {
    "테마": ["건설", "유통", "화학", "금융", "가스공사"],
    "종목명": ["계룡건설", "이마트", "넥센", "부국증권", "한국가스공사"],
    "PER": [2, 4, 3, 5, 6],  # PER 정보는 구체적으로 제공되지 않았으므로 None으로 표시
    "PBR": [0.16, 0.16, 0.17, 0.18, 0.19]  # PBR 정보는 제공된 내용을 바탕으로 입력
}

# pandas 라이브러리를 사용하여 데이터 프레임 생성
df = pd.DataFrame(data)



In [10]:
df1 = df[df["테마"] == "건설"]

In [11]:
df1

Unnamed: 0,테마,종목명,PER,PBR
0,건설,계룡건설,2,0.16


In [12]:
mean1 = df1['PBR'].mean()

In [13]:
mean1

0.16

In [14]:
df.groupby("테마")["PER"].mean()

테마
가스공사    6.0
건설      2.0
금융      5.0
유통      4.0
화학      3.0
Name: PER, dtype: float64