In [54]:
import pandas as pd
import numpy as np

In [55]:
data = {"names": ["Kilho", "Kilho", "Kilho", "Charles", "Chearles"],
       "year":[2014,2015,2016,2015,2016],
       "points":[1.5, 1.7, 3.6, 2.4, 2.9]}
df = pd.DataFrame(data, columns = ["year", "names", "points", "penalty"],
                                    index = ["one", "two", "three", "four", "five"])

In [56]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Kilho,1.5,
two,2015,Kilho,1.7,
three,2016,Kilho,3.6,
four,2015,Charles,2.4,
five,2016,Chearles,2.9,


In [57]:
df["year"]
# index와 함께 Series 형태로 추출

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [58]:
# 위와 동일하게 추출
df.year

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [59]:
# 대괄호 2개 / list형태로 명시 / year라는 열, points라는 열이 dataframe 생성
df[["year", "points"]]

Unnamed: 0,year,points
one,2014,1.5
two,2015,1.7
three,2016,3.6
four,2015,2.4
five,2016,2.9


In [60]:
# penalty 열에 0.5 대입
df["penalty"] = 0.5

In [61]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Kilho,1.5,0.5
two,2015,Kilho,1.7,0.5
three,2016,Kilho,3.6,0.5
four,2015,Charles,2.4,0.5
five,2016,Chearles,2.9,0.5


In [62]:
df["penalty"] = [0.1, 0.2, 0.3, 0.4, 0.5]

In [63]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Kilho,1.5,0.1
two,2015,Kilho,1.7,0.2
three,2016,Kilho,3.6,0.3
four,2015,Charles,2.4,0.4
five,2016,Chearles,2.9,0.5


In [64]:
# numpy를 활용한 새로운 dataframe 열을 추가하고 싶을 때 
df["zeros"] = np.arange(5)

In [65]:
df

Unnamed: 0,year,names,points,penalty,zeros
one,2014,Kilho,1.5,0.1,0
two,2015,Kilho,1.7,0.2,1
three,2016,Kilho,3.6,0.3,2
four,2015,Charles,2.4,0.4,3
five,2016,Chearles,2.9,0.5,4


In [66]:
# pandas Series 활용한 열 추가
val = pd.Series([-1.2, -1.5, -1.7], index = ["two", "four", "five"])

In [67]:
df["debt"] = val

In [68]:
df

Unnamed: 0,year,names,points,penalty,zeros,debt
one,2014,Kilho,1.5,0.1,0,
two,2015,Kilho,1.7,0.2,1,-1.2
three,2016,Kilho,3.6,0.3,2,
four,2015,Charles,2.4,0.4,3,-1.5
five,2016,Chearles,2.9,0.5,4,-1.7


In [69]:
df["net_points"] = df["points"] - df["penalty"]

In [70]:
df["high_points"] = df["net_points"] > 2.0

In [71]:
df

Unnamed: 0,year,names,points,penalty,zeros,debt,net_points,high_points
one,2014,Kilho,1.5,0.1,0,,1.4,False
two,2015,Kilho,1.7,0.2,1,-1.2,1.5,False
three,2016,Kilho,3.6,0.3,2,,3.3,True
four,2015,Charles,2.4,0.4,3,-1.5,2.0,False
five,2016,Chearles,2.9,0.5,4,-1.7,2.4,True


In [72]:
# 열 삭제
del df["high_points"]
del df["net_points"]
del df["zeros"]
df

Unnamed: 0,year,names,points,penalty,debt
one,2014,Kilho,1.5,0.1,
two,2015,Kilho,1.7,0.2,-1.2
three,2016,Kilho,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5
five,2016,Chearles,2.9,0.5,-1.7


In [73]:
df.index.name = "Order"
df.columns.name="Info"
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014,Kilho,1.5,0.1,
two,2015,Kilho,1.7,0.2,-1.2
three,2016,Kilho,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5
five,2016,Chearles,2.9,0.5,-1.7


In [74]:
# Numpy와는 다르게 Pandas에서는 행을 Indexing하는 방법이 무수히 많다
df[0:3] # index와는 다르게 0행부터 2행까지

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014,Kilho,1.5,0.1,
two,2015,Kilho,1.7,0.2,-1.2
three,2016,Kilho,3.6,0.3,


In [75]:
df["two":"four"] # Index 가져오기

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015,Kilho,1.7,0.2,-1.2
three,2016,Kilho,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5


In [76]:
# .loc()  / .iloc() 사용 권장
# index가 two인 모든 값을 가져옴
df.loc["two"]

Info
year        2015
names      Kilho
points       1.7
penalty      0.2
debt        -1.2
Name: two, dtype: object

In [77]:
df.loc["two":"four"]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015,Kilho,1.7,0.2,-1.2
three,2016,Kilho,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5


In [78]:
df.loc["two":"four", "points"] # two-four까지 points에 해당하는 값만 가져옴

Order
two      1.7
three    3.6
four     2.4
Name: points, dtype: float64

In [81]:
# loc함수를 활용한 열 추가
df.loc["six", :] = [2013, "Hayoung", 4.0 ,0.1, 2.1]

In [82]:
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,Kilho,1.5,0.1,
two,2015.0,Kilho,1.7,0.2,-1.2
three,2016.0,Kilho,3.6,0.3,
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Chearles,2.9,0.5,-1.7
six,2013.0,Hayoung,4.0,0.1,2.1


In [85]:
# index 3에 해당하는 모든 내용
df.iloc[3]

Info
year          2015
names      Charles
points         2.4
penalty        0.4
debt          -1.5
Name: four, dtype: object

In [87]:
# 행과 열에 대한 범위의 indexing도 가능
df.iloc[3:5, 0:2]

Info,year,names
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
four,2015.0,Charles
five,2016.0,Chearles


In [88]:
df["year"] > 2014

Order
one      False
two       True
three     True
four      True
five      True
six      False
Name: year, dtype: bool

In [89]:
df.loc[df["year"]> 2014, :]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015.0,Kilho,1.7,0.2,-1.2
three,2016.0,Kilho,3.6,0.3,
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Chearles,2.9,0.5,-1.7


In [90]:
df.loc[df["names"] == "Kilho", ["names", "points"]]

Info,names,points
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,Kilho,1.5
two,Kilho,1.7
three,Kilho,3.6


In [92]:
df.loc[(df["points"] > 2) & (df["points"] <3), :]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Chearles,2.9,0.5,-1.7
