In [1]:
import numpy as np
import pandas as pd

In [2]:
nba = pd.read_csv("C:/python/datas/nba.csv", index_col="Name")
nba.nlargest(1, columns=["Salary"])

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Stephen Curry,Golden State Warriors,PG,3/14/88,40231758


In [3]:
# DataFrame 단일 값 추출
# at: 행, 열의 label으로 인덱싱
# iat: 행, 열의 인덱스(정수)로 인덱싱

In [4]:
%%timeit
# %%timeit는 jupyter 매직 메서드
# 코드를 실행하고 걸리는 평균 시간 측정

nba.at["Stephen Curry", "Birthday"]     # .at(label로 인덱싱)

3.55 µs ± 306 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [5]:
%%timeit
nba.loc["Stephen Curry", "Birthday"]

13.3 µs ± 2.24 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [6]:
# %%timeit를 통해 100,000회 반복한 평균 실행시간 도출
# at이 loc에 비해 빠름

In [7]:
%%timeit
nba.iat[263, 1]    # .iat(정수 값으로 인덱싱)

14.4 µs ± 3.1 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [8]:
%%timeit
nba.iloc[263, 1]

17.5 µs ± 741 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [9]:
# %%timeit를 통해 100,000회 반복한 평균 실행시간 도출
# iat이 iloc에 비해 빠름

In [10]:
# Series loc, iloc, at, iat
# loc, at: label index 값을 활용해서 인덱싱
# iloc, iat: Series의 index(정수) 값으로 인덱싱

In [11]:
nba.Salary.loc["Damian Lillard"]

29802321

In [12]:
nba.Salary.at["Damian Lillard"]

29802321

In [13]:
nba.Salary.iloc[234]

2033160

In [14]:
nba.Salary.iat[234]

2033160

In [15]:
# DataFrame columns 값 변경하기

# columns 속성에 리스트 값으로 재할당하면 columns 변경 가능
nba.columns

Index(['Team', 'Position', 'Birthday', 'Salary'], dtype='object')

In [16]:
nba.columns = ['Team', 'Position', 'Date of Birth', 'Pay']

In [17]:
nba.head(1)

Unnamed: 0_level_0,Team,Position,Date of Birth,Pay
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,9/26/96,1445697


In [18]:
# rename()메서드: DataFrame columns 값 변경하기

# columns 키워드 인수에 딕셔너리 값 전달
# key값은 기존 컬럼명, value값은 변경 후 컬럼명 입력

# rename은 기본적으로 inplace 키워드 인수 값이 False이므로,
# 재할당하거나 inplace = True로 메서드를 호출해야 변경 가능

nba.rename(columns = {"Date of Birth" : "Birthday", "Pay" : "Salary"})
nba.head(2)     # inplace = True를 안 한 상태

Unnamed: 0_level_0,Team,Position,Date of Birth,Pay
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,9/26/96,1445697
Christian Wood,Detroit Pistons,PF,9/27/95,1645357


In [19]:
nba.rename(columns = {"Date of Birth" : "Birthday", "Pay" : "Salary"}, 
           inplace = True)
nba.head(2)

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,9/26/96,1445697
Christian Wood,Detroit Pistons,PF,9/27/95,1645357


In [20]:
# Column 순서 바꾸기

# 배열 꼴로 컬럼명 순서를 새로 입력하면 컬럼 단위로 순서가 바뀐다.
nba2 = nba[["Team", "Position", "Salary", "Birthday"]]
nba2.head(2)

Unnamed: 0_level_0,Team,Position,Salary,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1445697,9/26/96
Christian Wood,Detroit Pistons,PF,1645357,9/27/95


In [21]:
# rename() 메서드: DataFrame index 값 변경하기

# Birthday → Date of Birth, Salary → Pay로 다시 컬럼명 변경
nba.rename(columns = {"Birthday" : "Date of Birth", "Salary" : "Pay"}, 
           inplace = True)
nba.head(2)

Unnamed: 0_level_0,Team,Position,Date of Birth,Pay
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,9/26/96,1445697
Christian Wood,Detroit Pistons,PF,9/27/95,1645357


In [22]:
# rename() 메서드를 활용해서 index값 변경 가능
# index 키워드 인수에 딕셔너리 형태로 전달
# key값은 기존 row index, value값은 변경 후 row index

nba.loc["Giannis Antetokounmpo"]

Team             Milwaukee Bucks
Position                      PF
Date of Birth            12/6/94
Pay                     25842697
Name: Giannis Antetokounmpo, dtype: object

In [23]:
nba = nba.rename(index = {"Giannis Antetokounmpo" : "Greek Freak"})

In [24]:
nba.loc["Greek Freak"]

Team             Milwaukee Bucks
Position                      PF
Date of Birth            12/6/94
Pay                     25842697
Name: Greek Freak, dtype: object

In [25]:
# 연습문제

nfl = pd.read_csv('C:/python/datas/nfl.csv', 
                  index_col='Name',
                  parse_dates=["Birthday"])

nfl

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tremon Smith,Philadelphia Eagles,RB,1996-07-20,570000
Shawn Williams,Cincinnati Bengals,SS,1991-05-13,3500000
Adam Butler,New England Patriots,DT,1994-04-12,645000
Derek Wolfe,Denver Broncos,DE,1990-02-24,8000000
Jake Ryan,Jacksonville Jaguars,OLB,1992-02-27,1000000
...,...,...,...,...
Bashaud Breeland,Kansas City Chiefs,CB,1992-01-30,805000
Craig James,Philadelphia Eagles,CB,1996-04-29,570000
Jonotthan Harrison,New York Jets,C,1991-08-25,1500000
Chuma Edoga,New York Jets,OT,1997-05-25,495000


In [35]:
# nfl 데이터셋에서 팀당 몇 명의 선수가 소속되어 있는지 확인

nfl.Team.value_counts()

New York Jets           58
Kansas City Chiefs      56
Washington Redskins     56
New Orleans Saints      55
San Francisco 49Ers     55
Denver Broncos          54
Minnesota Vikings       54
Los Angeles Chargers    54
Seattle Seahawks        53
Dallas Cowboys          53
Buffalo Bills           53
Atlanta Falcons         53
Detroit Lions           53
Chicago Bears           53
Los Angeles Rams        52
New York Giants         52
Philadelphia Eagles     52
Houston Texans          52
Arizona Cardinals       51
Cincinnati Bengals      51
Green Bay Packers       51
Oakland Raiders         51
Jacksonville Jaguars    50
Cleveland Browns        49
Miami Dolphins          49
Indianapolis Colts      49
Carolina Panthers       49
New England Patriots    49
Baltimore Ravens        48
Pittsburgh Steelers     47
Tampa Bay Buccaneers    47
Tennessee Titans        46
Name: Team, dtype: int64

In [39]:
# 가장 높은 연봉을 받는 5명을 조회

nfl.nlargest(5, columns="Salary")

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Kirk Cousins,Minnesota Vikings,QB,1988-08-19,27500000
Marcus Mariota,Tennessee Titans,QB,1993-10-30,20922000
Jameis Winston,Tampa Bay Buccaneers,QB,1994-01-06,20922000
Derek Carr,Oakland Raiders,QB,1991-03-28,19900000
Jimmy Garoppolo,San Francisco 49Ers,QB,1991-11-02,17200000


In [49]:
# 팀마다 연봉을 많이 받는 사람들을 내림차순으로 조회
# 팀은 오름차순으로 정렬

nfl.sort_values(by=['Team','Salary'],
                ascending=[True, False])

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chandler Jones,Arizona Cardinals,OLB,1990-02-27,16500000
Patrick Peterson,Arizona Cardinals,CB,1990-07-11,11000000
Larry Fitzgerald,Arizona Cardinals,WR,1983-08-31,11000000
David Johnson,Arizona Cardinals,RB,1991-12-16,5700000
Justin Pugh,Arizona Cardinals,G,1990-08-15,5000000
...,...,...,...,...
Ross Pierschbacher,Washington Redskins,C,1995-05-05,495000
Kelvin Harmon,Washington Redskins,WR,1996-12-15,495000
Wes Martin,Washington Redskins,G,1996-05-09,495000
Jimmy Moreland,Washington Redskins,CB,1995-08-26,495000


In [78]:
# New York Jets 팀에서 나이가 가장 많은 선수와, 그의 생일을 조회
# index를 Team으로 변경하세요. 단 기존 index는 column으로 다시 복원

nfl.loc[nfl['Birthday'] == nfl.Birthday.min()]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tom Brady,New England Patriots,QB,1977-08-03,14000000
