In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {"names": ["soyul", "soyul", "soyul", "Charles", "Charles"], 
         "year": [2014, 2015, 2016, 2015, 2016], 
       "points": [1.5, 1.7, 3.6, 2.4, 2.9]} 
df = pd.DataFrame(data, columns=["year", "names", "points", "penalty"],     
                        index=["one", "two", "three", "four", "five"])

In [3]:
df

Unnamed: 0,year,names,points,penalty
one,2014,soyul,1.5,
two,2015,soyul,1.7,
three,2016,soyul,3.6,
four,2015,Charles,2.4,
five,2016,Charles,2.9,


In [4]:
#하나의 열을 선택하면 series 형태로 반환됨
df["year"]

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [5]:
df.year

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [6]:
df[["year","points"]]

Unnamed: 0,year,points
one,2014,1.5
two,2015,1.7
three,2016,3.6
four,2015,2.4
five,2016,2.9


In [7]:
df["penalty"] = 0.5

In [8]:
df

Unnamed: 0,year,names,points,penalty
one,2014,soyul,1.5,0.5
two,2015,soyul,1.7,0.5
three,2016,soyul,3.6,0.5
four,2015,Charles,2.4,0.5
five,2016,Charles,2.9,0.5


In [9]:
df["penalty"] = [0.1,0.2,0.3,0.4,0.5]

In [10]:
df

Unnamed: 0,year,names,points,penalty
one,2014,soyul,1.5,0.1
two,2015,soyul,1.7,0.2
three,2016,soyul,3.6,0.3
four,2015,Charles,2.4,0.4
five,2016,Charles,2.9,0.5


In [11]:
df["zeros"] = np.arange(5)

In [12]:
df

Unnamed: 0,year,names,points,penalty,zeros
one,2014,soyul,1.5,0.1,0
two,2015,soyul,1.7,0.2,1
three,2016,soyul,3.6,0.3,2
four,2015,Charles,2.4,0.4,3
five,2016,Charles,2.9,0.5,4


In [13]:
#series 생성해서 새로운 열 추가함
val = pd.Series([-1.2,-1.5,-1.7],index=["two","four","five"])
df["debt"] = val

In [14]:
df

Unnamed: 0,year,names,points,penalty,zeros,debt
one,2014,soyul,1.5,0.1,0,
two,2015,soyul,1.7,0.2,1,-1.2
three,2016,soyul,3.6,0.3,2,
four,2015,Charles,2.4,0.4,3,-1.5
five,2016,Charles,2.9,0.5,4,-1.7


In [15]:
df["net_points"] = df["points"] - df["penalty"]
df

Unnamed: 0,year,names,points,penalty,zeros,debt,net_points
one,2014,soyul,1.5,0.1,0,,1.4
two,2015,soyul,1.7,0.2,1,-1.2,1.5
three,2016,soyul,3.6,0.3,2,,3.3
four,2015,Charles,2.4,0.4,3,-1.5,2.0
five,2016,Charles,2.9,0.5,4,-1.7,2.4


In [16]:
df["high_points"] = df["net_points"] > 2.0

In [17]:
df

Unnamed: 0,year,names,points,penalty,zeros,debt,net_points,high_points
one,2014,soyul,1.5,0.1,0,,1.4,False
two,2015,soyul,1.7,0.2,1,-1.2,1.5,False
three,2016,soyul,3.6,0.3,2,,3.3,True
four,2015,Charles,2.4,0.4,3,-1.5,2.0,False
five,2016,Charles,2.9,0.5,4,-1.7,2.4,True


In [18]:
#dataframe에서 열을 삭제함
del df["high_points"]

In [19]:
del df["net_points"]

In [20]:
del df["zeros"]

In [21]:
df

Unnamed: 0,year,names,points,penalty,debt
one,2014,soyul,1.5,0.1,
two,2015,soyul,1.7,0.2,-1.2
three,2016,soyul,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5
five,2016,Charles,2.9,0.5,-1.7


In [22]:
#컬럼 확인
df.columns

Index(['year', 'names', 'points', 'penalty', 'debt'], dtype='object')

In [23]:
df.index.name="Order"
df.columns.name="Info"

In [24]:
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014,soyul,1.5,0.1,
two,2015,soyul,1.7,0.2,-1.2
three,2016,soyul,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5
five,2016,Charles,2.9,0.5,-1.7


In [25]:
#행(row)을 선택하고 조작하는 방법
#범위 인텍싱 , 열을 선택하는 인덱싱과 비슷하므로 권장하지 않음
df[0:3]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014,soyul,1.5,0.1,
two,2015,soyul,1.7,0.2,-1.2
three,2016,soyul,3.6,0.3,


In [26]:
#실제의 index를 범위로  인덱싱 , 열을 선택하는 인덱싱과 비슷하므로 권장하지 않음
df["two":"four"]
#권장하는 방법  loc() 함수,  iloc() 함수

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015,soyul,1.7,0.2,-1.2
three,2016,soyul,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5


In [27]:
#행 만 선택
df.loc["two"]

Info
year        2015
names      soyul
points       1.7
penalty      0.2
debt        -1.2
Name: two, dtype: object

In [28]:
df.loc["two":"four"]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015,soyul,1.7,0.2,-1.2
three,2016,soyul,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5


In [29]:
#행과 특정열을 선택

In [30]:
df.loc["two":"four","points"]

Order
two      1.7
three    3.6
four     2.4
Name: points, dtype: float64

In [31]:
#모든행과  특정2개의 열을 선택
df.loc[:,["year","names"]]

Info,year,names
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,2014,soyul
two,2015,soyul
three,2016,soyul
four,2015,Charles
five,2016,Charles


In [32]:
#특정 행과 열을 선택
df.loc["three":"five","year":"penalty"]

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
three,2016,soyul,3.6,0.3
four,2015,Charles,2.4,0.4
five,2016,Charles,2.9,0.5


In [33]:
#새로운 행 추가   데이터는 리스트 나 numpy array 형태로 줄 수 있다.
df.loc["six",:] = [2013,"Hayoung",4.0,0.1,2.1]

In [34]:
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,soyul,1.5,0.1,
two,2015.0,soyul,1.7,0.2,-1.2
three,2016.0,soyul,3.6,0.3,
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Charles,2.9,0.5,-1.7
six,2013.0,Hayoung,4.0,0.1,2.1


In [35]:
#여기서부터는 iloc() 함수
#인덱스 3인 행을 가져오기

In [36]:
df.iloc[3]

Info
year          2015
names      Charles
points         2.4
penalty        0.4
debt          -1.5
Name: four, dtype: object

In [37]:
#행과 열에 대한 범위 인덱싱을  주어서 가져오기

In [38]:
df.iloc[3:5,0:2]

Info,year,names
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
four,2015.0,Charles
five,2016.0,Charles


In [39]:
df.iloc[3]

Info
year          2015
names      Charles
points         2.4
penalty        0.4
debt          -1.5
Name: four, dtype: object

In [40]:
#행과열에 대하여 원하는 인덱스만 명시하여 가져올 수 있음

In [41]:
df.iloc[[0,1,3],[1,2]]

Info,names,points
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,soyul,1.5
two,soyul,1.7
four,Charles,2.4


In [42]:
#모든 행과  특정 열을 가져오기
df.iloc[:,1:4]

Info,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,soyul,1.5,0.1
two,soyul,1.7,0.2
three,soyul,3.6,0.3
four,Charles,2.4,0.4
five,Charles,2.9,0.5
six,Hayoung,4.0,0.1


In [43]:
df.iloc[1,1]

'soyul'

In [44]:
#boolean 인덱싱  mask
#year가 2014보다 큰 행을 가져오려면
df["year"] > 2014

Order
one      False
two       True
three     True
four      True
five      True
six      False
Name: year, dtype: bool

In [45]:
df.loc[df["year"] > 2014,:]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015.0,soyul,1.7,0.2,-1.2
three,2016.0,soyul,3.6,0.3,
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Charles,2.9,0.5,-1.7


In [46]:
df.loc[df["names"] == "soyul",["names","points"]]

Info,names,points
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,soyul,1.5
two,soyul,1.7
three,soyul,3.6


In [47]:
#여러개의 bolean 마스크를 and 연산자로 연결

In [48]:
df.loc[(df["points"] > 2) & (df["points"] < 3),:]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Charles,2.9,0.5,-1.7


In [49]:
df.loc[df["points"] > 3,"penalty"] = 0

In [50]:
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,soyul,1.5,0.1,
two,2015.0,soyul,1.7,0.2,-1.2
three,2016.0,soyul,3.6,0.0,
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Charles,2.9,0.5,-1.7
six,2013.0,Hayoung,4.0,0.0,2.1
