In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from pandas.testing import assert_frame_equal
import matplotlib.pyplot as plt
import seaborn as sns
p1 = Path.cwd() / 'back_data'

In [2]:
college = pd.read_csv(p1 / 'college.csv', index_col='INSTNM')
city = college['CITY']

In [4]:
# Series(및 DataFrame)의 데이터 선택 : 인덱스 직접 연산, .loc 속성, .iloc 속성 등
# index의 get_loc() 메서드 활용하여 위치 정수 추출 가능
city['Alabama A & M University'], city.loc['Alabama A & M University'], city.iloc[0], city.index.get_loc('Alabama A & M University')

('Normal', 'Normal', 'Normal', 0)

In [5]:
# 슬라이싱의 경우 키워드 추출은 양쪽 닫힘, 위치 추출은 반개구간
# 키워드 슬라이싱의 경우 정지 인덱스 다음 시작 인덱스가 나타나면 예외 발생 없이 빈 Series만 반환
city['Alabama A & M University':'Alabama State University'], city.loc['Alabama A & M University':'Alabama State University'], city.iloc[0:5]

(INSTNM
 Alabama A & M University                   Normal
 University of Alabama at Birmingham    Birmingham
 Amridge University                     Montgomery
 University of Alabama in Huntsville    Huntsville
 Alabama State University               Montgomery
 Name: CITY, dtype: object,
 INSTNM
 Alabama A & M University                   Normal
 University of Alabama at Birmingham    Birmingham
 Amridge University                     Montgomery
 University of Alabama in Huntsville    Huntsville
 Alabama State University               Montgomery
 Name: CITY, dtype: object,
 INSTNM
 Alabama A & M University                   Normal
 University of Alabama at Birmingham    Birmingham
 Amridge University                     Montgomery
 University of Alabama in Huntsville    Huntsville
 Alabama State University               Montgomery
 Name: CITY, dtype: object)

In [6]:
# 불리언 배열을 사용해 특정 값 추출 가능
alabama_mask = city.isin(['Birmingham', 'Montgomery'])
city[alabama_mask], city.loc[alabama_mask]

(INSTNM
 University of Alabama at Birmingham                 Birmingham
 Amridge University                                  Montgomery
 Alabama State University                            Montgomery
 Auburn University at Montgomery                     Montgomery
 Birmingham Southern College                         Birmingham
 South University-Montgomery                         Montgomery
 Faulkner University                                 Montgomery
 Herzing University-Birmingham                       Birmingham
 Huntingdon College                                  Montgomery
 Jefferson State Community College                   Birmingham
 Lawson State Community College-Birmingham Campus    Birmingham
 Samford University                                  Birmingham
 Southeastern Bible College                          Birmingham
 H Councill Trenholm State Community College         Montgomery
 West Virginia University Institute of Technology    Montgomery
 Virginia College-Birmingham    

In [11]:
# DataFrame에서 한 개 행을 선택하면 Series로 반환 -> 여러개 행 선택 시 DataFrame 반환
# loc 속성 및 iloc 속성에 리스트 혹은 슬라이스 전달 -> 여러개 행 선택
college.loc['University of Alaska Anchorage'], college.iloc[60]

(CITY                  Anchorage
 STABBR                       AK
 HBCU                        0.0
 MENONLY                     0.0
 WOMENONLY                   0.0
 RELAFFIL                      0
 SATVRMID                    NaN
 SATMTMID                    NaN
 DISTANCEONLY                0.0
 UGDS                    12865.0
 UGDS_WHITE               0.5747
 UGDS_BLACK               0.0358
 UGDS_HISP                0.0761
 UGDS_ASIAN               0.0778
 UGDS_AIAN                0.0653
 UGDS_NHPI                0.0086
 UGDS_2MOR                 0.098
 UGDS_NRA                 0.0181
 UGDS_UNKN                0.0457
 PPTUG_EF                 0.4539
 CURROPER                      1
 PCTPELL                  0.2385
 PCTFLOAN                 0.2647
 UG25ABV                  0.4386
 MD_EARN_WNE_P10           42500
 GRAD_DEBT_MDN_SUPP      19449.5
 Name: University of Alaska Anchorage, dtype: object,
 CITY                  Anchorage
 STABBR                       AK
 HBCU                 

In [15]:
# DataFrame 행, 열 동시 선택 : 행, 열 순 -> ':'(콜론) 사용하면 모든 행/열 선택 가능
college.loc[:, ['WOMENONLY', 'SATVRMID']], college.iloc[:, [4, 6]]

(                                                    WOMENONLY  SATVRMID
 INSTNM                                                                 
 Alabama A & M University                                  0.0     424.0
 University of Alabama at Birmingham                       0.0     570.0
 Amridge University                                        0.0       NaN
 University of Alabama in Huntsville                       0.0     595.0
 Alabama State University                                  0.0     425.0
 ...                                                       ...       ...
 SAE Institute of Technology  San Francisco                NaN       NaN
 Rasmussen College - Overland Park                         NaN       NaN
 National Personal Training Institute of Cleveland         NaN       NaN
 Bay Area Medical Academy - San Jose Satellite L...        NaN       NaN
 Excel Learning Center-San Antonio South                   NaN       NaN
 
 [7535 rows x 2 columns],
                       

In [16]:
# index / columns의 get_loc() 메서드 활용해 위치 정수 추출
col_start = college.columns.get_loc('UGDS_WHITE')
col_end = college.columns.get_loc('UGDS_UNKN') + 1
# 키워드 슬라이싱은 닫힘구간, 위치 슬라이싱은 반개구간이므로 col_end에 +1 추가
college.iloc[:5, col_start:col_end]

Unnamed: 0_level_0,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Alabama A & M University,0.0333,0.9353,0.0055,0.0019,0.0024,0.0019,0.0,0.0059,0.0138
University of Alabama at Birmingham,0.5922,0.26,0.0283,0.0518,0.0022,0.0007,0.0368,0.0179,0.01
Amridge University,0.299,0.4192,0.0069,0.0034,0.0,0.0,0.0,0.0,0.2715
University of Alabama in Huntsville,0.6988,0.1255,0.0382,0.0376,0.0143,0.0002,0.0172,0.0332,0.035
Alabama State University,0.0158,0.9208,0.0121,0.0019,0.001,0.0006,0.0098,0.0243,0.0137


In [18]:
# 인덱스가 정렬(sort_index() 메서드 활용)될 경우 .loc 속성에서 사전식 슬라이싱 가능
# index의 is_monotonic_increasing() 메서드 등으로 정렬 여부 확인 가능
# 반개구간의 형태로 작동 -> 엄밀히는 닫힘구간이나, 정확히 'Su'라는 이름에서 닫힘('Su***'는 미포함)
col2 = college.sort_index()
col2.loc['Sp':'Su']

Unnamed: 0_level_0,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,...,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Spa Tech Institute-Ipswich,Ipswich,MA,0.0,0.0,0.0,0,,,0.0,37.0,...,0.0000,0.0000,0.0541,0.4054,1,0.2656,0.3906,0.7907,21500,6333
Spa Tech Institute-Plymouth,Plymouth,MA,0.0,0.0,0.0,0,,,0.0,153.0,...,0.0000,0.0000,0.2484,0.3399,1,0.3716,0.4266,0.6250,21500,6333
Spa Tech Institute-Westboro,Westboro,MA,0.0,0.0,0.0,0,,,0.0,90.0,...,0.0000,0.0000,0.0222,0.5778,1,0.3409,0.4545,0.6882,21500,6333
Spa Tech Institute-Westbrook,Westbrook,ME,0.0,0.0,0.0,0,,,0.0,240.0,...,0.0000,0.0000,0.0042,0.2542,1,0.4350,0.5093,0.5224,21500,6333
Spalding University,Louisville,KY,0.0,0.0,0.0,1,490.0,440.0,0.0,1227.0,...,0.0302,0.0016,0.0326,0.2502,1,0.4442,0.6725,0.3764,41700,25000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Studio Academy of Beauty,Chandler,AZ,0.0,0.0,0.0,0,,,0.0,332.0,...,0.0392,0.0000,0.0090,0.0000,1,0.5855,0.6218,0.5675,,6333
Studio Jewelers,New York,NY,0.0,0.0,0.0,0,,,0.0,55.0,...,0.0000,0.0364,0.0000,0.6000,1,0.0451,0.0902,0.8525,PrivacySuppressed,PrivacySuppressed
Stylemaster College of Hair Design,Longview,WA,0.0,0.0,0.0,0,,,0.0,77.0,...,0.0130,0.0000,0.0000,0.0000,1,0.8036,0.7024,0.4510,17000,13320
Styles and Profiles Beauty College,Selmer,TN,0.0,0.0,0.0,0,,,0.0,31.0,...,0.0000,0.0000,0.0000,0.0000,1,0.8182,0.7955,0.2400,PrivacySuppressed,PrivacySuppressed
