# 판다스 (pandas)

In [1]:
!pip install pandas

Collecting pandas
  Downloading pandas-2.3.1-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading pandas-2.3.1-cp313-cp313-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   ------- -------------------------------- 2.1/11.0 MB 10.5 MB/s eta 0:00:01
   ---------------- ----------------------- 4.5/11.0 MB 11.2 MB/s eta 0:00:01
   ------------------------ --------------- 6.8/11.0 MB 11.4 MB/s eta 0:00:01
   --------------------------------- ------ 9.2/11.0 MB 11.4 MB/s eta 0:00:01
   ---------------------------------------  10.7/11.0 MB 11.5 MB/s eta 0:00:01
   ---------------------------------------- 11.0/11.0 MB 10.0 MB/s eta 0:00:00
Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Installing collected packages: pytz, pandas

   ---------------------------------------- 0/2 [pytz]
   -------------------- ------------------- 1/2 [pandas

In [106]:
import numpy as np
import pandas as pd

In [107]:
# 판다스 Series

data1 = np.arange(1,6)
print(data1)
data2 = pd.Series(data1)
print(data2)
data2.index, data2.values

[1 2 3 4 5]
0    1
1    2
2    3
3    4
4    5
dtype: int64


(RangeIndex(start=0, stop=5, step=1), array([1, 2, 3, 4, 5]))

In [108]:
data3 = pd.Series([11,22,33,44,55])
data3

0    11
1    22
2    33
3    44
4    55
dtype: int64

In [109]:
dd = {"apple" : 4400, "banana" :3500, "kiwi" : 2000, "orange" : 2100, "mango": 8200 }
data4 = pd.Series(dd)
data4

apple     4400
banana    3500
kiwi      2000
orange    2100
mango     8200
dtype: int64

In [110]:
print(type(data4))
data4.name = "과일 가격표"
data4.index.name = "과일 이름"
data4, data4.index, data4.values
data4

<class 'pandas.core.series.Series'>


과일 이름
apple     4400
banana    3500
kiwi      2000
orange    2100
mango     8200
Name: 과일 가격표, dtype: int64

In [111]:
data2.index = ["aa","bb","cc","ee","dd"] # 개수가 같아야 함
data2 , data2.index, data2.values

(aa    1
 bb    2
 cc    3
 ee    4
 dd    5
 dtype: int64,
 Index(['aa', 'bb', 'cc', 'ee', 'dd'], dtype='object'),
 array([1, 2, 3, 4, 5]))

In [112]:
# 판다스 시리즈 데이터 타입
data5 = pd.Series(np.arange(1,7), dtype = "float", index = ["aa","bb","cc","dd","ee","ff"])
data5, data5.shape, data5.size, data5.ndim

(aa    1.0
 bb    2.0
 cc    3.0
 dd    4.0
 ee    5.0
 ff    6.0
 dtype: float64,
 (6,),
 6,
 1)

In [113]:
# 시리즈 원소에 접근방법
print(data5)
print(data5['dd'])
print(data5.loc['dd'])
print("data5.iloc[3]:",data5.iloc[3] )

aa    1.0
bb    2.0
cc    3.0
dd    4.0
ee    5.0
ff    6.0
dtype: float64
4.0
4.0
data5.iloc[3]: 4.0


## 데이터프레임 Dataframe

In [114]:
import pandas as pd
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]],index=["a","b","c"], columns=["A","B","c"])
df

Unnamed: 0,A,B,c
a,1,2,3
b,4,5,6
c,7,8,9


In [115]:
num = np.arange(1,10).reshape(3,3)
df = pd.DataFrame(num, index=["a","b","c"], columns=["A","B","c"])
df

Unnamed: 0,A,B,c
a,1,2,3
b,4,5,6
c,7,8,9


In [116]:
dd = {"과일이름":["apple","banana", "kiwi", "melon", "orange", "mango"], 
      "가격": [4400,5000,2500,7200,7700,8100], 
      "개수":[3,8,11,22,3,5]}
df2 = pd.DataFrame(dd)
df2.index = range(1,7)
df2.index.name = "No."
df2

Unnamed: 0_level_0,과일이름,가격,개수
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,apple,4400,3
2,banana,5000,8
3,kiwi,2500,11
4,melon,7200,22
5,orange,7700,3
6,mango,8100,5


In [117]:
df2

Unnamed: 0_level_0,과일이름,가격,개수
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,apple,4400,3
2,banana,5000,8
3,kiwi,2500,11
4,melon,7200,22
5,orange,7700,3
6,mango,8100,5


In [118]:
df2.columns.name = "imfo"
df2.index.name = ''
df2.columns = ["과일이름", "원가","수량"]
df2

Unnamed: 0,과일이름,원가,수량
,,,
1.0,apple,4400.0,3.0
2.0,banana,5000.0,8.0
3.0,kiwi,2500.0,11.0
4.0,melon,7200.0,22.0
5.0,orange,7700.0,3.0
6.0,mango,8100.0,5.0


In [119]:
df3= pd.DataFrame(df2,columns = ["과일이름", "원가", "품종","할인","수량", "합계"])
df3, df3.dtypes

(     과일이름    원가  품종  할인  수량  합계
                                
 1   apple  4400 NaN NaN   3 NaN
 2  banana  5000 NaN NaN   8 NaN
 3    kiwi  2500 NaN NaN  11 NaN
 4   melon  7200 NaN NaN  22 NaN
 5  orange  7700 NaN NaN   3 NaN
 6   mango  8100 NaN NaN   5 NaN,
 과일이름     object
 원가        int64
 품종      float64
 할인      float64
 수량        int64
 합계      float64
 dtype: object)

In [120]:
df2.describe()

Unnamed: 0,원가,수량
count,6.0,6.0
mean,5816.666667,8.666667
std,2206.732124,7.229569
min,2500.0,3.0
25%,4550.0,3.5
50%,6100.0,6.5
75%,7575.0,10.25
max,8100.0,22.0


In [121]:
df3.loc[:,'품종'] = df3.loc[:,'품종'].astype('object')
df3.loc[:,'품종'] = ['신선한', '궁금한','맛있는','최적의','웰빙','유기농']
df3.loc[:,'할인'] = [0.2,0.3,0.1,0.3,0.2,0.1]
df3

  df3.loc[:,'품종'] = df3.loc[:,'품종'].astype('object')


Unnamed: 0,과일이름,원가,품종,할인,수량,합계
,,,,,,
1.0,apple,4400.0,신선한,0.2,3.0,
2.0,banana,5000.0,궁금한,0.3,8.0,
3.0,kiwi,2500.0,맛있는,0.1,11.0,
4.0,melon,7200.0,최적의,0.3,22.0,
5.0,orange,7700.0,웰빙,0.2,3.0,
6.0,mango,8100.0,유기농,0.1,5.0,


In [122]:
df3['합계'] = (df3["원가"] * df3["수량"] * (1 - df3["할인"])).astype('int')
df3

Unnamed: 0,과일이름,원가,품종,할인,수량,합계
,,,,,,
1.0,apple,4400.0,신선한,0.2,3.0,10560.0
2.0,banana,5000.0,궁금한,0.3,8.0,28000.0
3.0,kiwi,2500.0,맛있는,0.1,11.0,24750.0
4.0,melon,7200.0,최적의,0.3,22.0,110880.0
5.0,orange,7700.0,웰빙,0.2,3.0,18480.0
6.0,mango,8100.0,유기농,0.1,5.0,36450.0


In [123]:
df3['ETC'] = "-"
df3['Test'] = "test"
print(df3.dtypes)
df3

과일이름     object
원가        int64
품종       object
할인      float64
수량        int64
합계        int64
ETC      object
Test     object
dtype: object


Unnamed: 0,과일이름,원가,품종,할인,수량,합계,ETC,Test
,,,,,,,,
1.0,apple,4400.0,신선한,0.2,3.0,10560.0,-,test
2.0,banana,5000.0,궁금한,0.3,8.0,28000.0,-,test
3.0,kiwi,2500.0,맛있는,0.1,11.0,24750.0,-,test
4.0,melon,7200.0,최적의,0.3,22.0,110880.0,-,test
5.0,orange,7700.0,웰빙,0.2,3.0,18480.0,-,test
6.0,mango,8100.0,유기농,0.1,5.0,36450.0,-,test


In [124]:
del df3['Test']
df3

Unnamed: 0,과일이름,원가,품종,할인,수량,합계,ETC
,,,,,,,
1.0,apple,4400.0,신선한,0.2,3.0,10560.0,-
2.0,banana,5000.0,궁금한,0.3,8.0,28000.0,-
3.0,kiwi,2500.0,맛있는,0.1,11.0,24750.0,-
4.0,melon,7200.0,최적의,0.3,22.0,110880.0,-
5.0,orange,7700.0,웰빙,0.2,3.0,18480.0,-
6.0,mango,8100.0,유기농,0.1,5.0,36450.0,-


In [125]:
ee =pd.Series([1.3,2.2], index=[2,4])
df3['ETC'] = ee
df3 = df3.fillna('')
df3

Unnamed: 0,과일이름,원가,품종,할인,수량,합계,ETC
,,,,,,,
1.0,apple,4400.0,신선한,0.2,3.0,10560.0,
2.0,banana,5000.0,궁금한,0.3,8.0,28000.0,1.3
3.0,kiwi,2500.0,맛있는,0.1,11.0,24750.0,
4.0,melon,7200.0,최적의,0.3,22.0,110880.0,2.2
5.0,orange,7700.0,웰빙,0.2,3.0,18480.0,
6.0,mango,8100.0,유기농,0.1,5.0,36450.0,


In [126]:
df3['PASS'] = ''
df3

Unnamed: 0,과일이름,원가,품종,할인,수량,합계,ETC,PASS
,,,,,,,,
1.0,apple,4400.0,신선한,0.2,3.0,10560.0,,
2.0,banana,5000.0,궁금한,0.3,8.0,28000.0,1.3,
3.0,kiwi,2500.0,맛있는,0.1,11.0,24750.0,,
4.0,melon,7200.0,최적의,0.3,22.0,110880.0,2.2,
5.0,orange,7700.0,웰빙,0.2,3.0,18480.0,,
6.0,mango,8100.0,유기농,0.1,5.0,36450.0,,


In [127]:
pp = np.where(df3['합계']<=30000,'구매각','-')
df3['PASS'] = pp
df3

Unnamed: 0,과일이름,원가,품종,할인,수량,합계,ETC,PASS
,,,,,,,,
1.0,apple,4400.0,신선한,0.2,3.0,10560.0,,구매각
2.0,banana,5000.0,궁금한,0.3,8.0,28000.0,1.3,구매각
3.0,kiwi,2500.0,맛있는,0.1,11.0,24750.0,,구매각
4.0,melon,7200.0,최적의,0.3,22.0,110880.0,2.2,-
5.0,orange,7700.0,웰빙,0.2,3.0,18480.0,,구매각
6.0,mango,8100.0,유기농,0.1,5.0,36450.0,,-


In [128]:
# df3.iloc[1,1:4]
# df3.loc[2,:]
df3.loc[df3.loc[:,'과일이름']=='banana','품종':'수량']

Unnamed: 0,품종,할인,수량
,,,
2.0,궁금한,0.3,8.0


In [130]:
df3.loc[df3.loc[:,'PASS']=='구매각',['과일이름','품종','원가','PASS']]

Unnamed: 0,과일이름,품종,원가,PASS
,,,,
1.0,apple,신선한,4400.0,구매각
2.0,banana,궁금한,5000.0,구매각
3.0,kiwi,맛있는,2500.0,구매각
5.0,orange,웰빙,7700.0,구매각


In [135]:
import pandas as pd

data = {
    "population": [51780, 12580, 144100, 331000],  # 단위: 만명 (예시)
    "GDP": [1_800, 5_000, 14_300, 21_400]         # 단위: 십억 달러 (예시)
}

country = ['korea', 'japan', 'china', 'usa']

df = pd.DataFrame(data, index=countries)
df.index.name = "country"

print(df)

         population    GDP
country                   
korea         51780   1800
japan         12580   5000
china        144100  14300
usa          331000  21400


In [140]:
import pandas as pd

data = {
    "population": [51780, 12580, 144100, 331000],
    "GDP": [1800, 5000, 14300, 21400]
}

countries = ["korea", "japan", "china", "usa"]

df = pd.DataFrame(data, index=countries)
df.index.name = "country"

In [146]:
print(df.loc[['korea'], :])

         population   GDP   GDP_per
country                            
korea         51780  1800  0.034762


In [147]:
# 1인당 GDP 컬럼 추가
df['GDP_per'] = df['GDP'] / df['population']

print(df)

         population    GDP   GDP_per
country                             
korea         51780   1800  0.034762
japan         12580   5000  0.397456
china        144100  14300  0.099237
usa          331000  21400  0.064653
