In [1]:

from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd

In [2]:
### 라벨 인코딩할 컬럼이 1개인 경우
계절 = ['봄', '여름', '가을', '겨울']
계절

['봄', '여름', '가을', '겨울']

In [4]:
# 하나의 컬럼은 Series객체임.
data = pd.Series(계절) #Series --> 항목명 + 인덱스 + 값들
data

0     봄
1    여름
2    가을
3    겨울
dtype: object

In [5]:
encoder = LabelEncoder()

In [6]:
## 1. 각 값에 대해서 0부터 번호를 매김(사전을 만든다. 봄:0, 여름:1, ...)
## 2. 내가 가지고 있는 값들을 매긴 값에 따라 변환함.(봄, 여름, 여름 --> 0, 1, 1)

In [7]:
encoder.fit(data) #오름차순으로 숫자로 인코딩

In [8]:
encoder.classes_

array(['가을', '겨울', '봄', '여름'], dtype=object)

In [9]:
# ['가을', '겨울', '봄', '여름'] 순서대로 번호를 매김(인덱스 번호와 일치)

In [10]:
변환할컬럼 = ['가을', '겨울', '봄', '여름',  '봄', '여름', '겨울', '봄', '여름',  '봄', '여름']
target = pd.Series(변환할컬럼)
target

0     가을
1     겨울
2      봄
3     여름
4      봄
5     여름
6     겨울
7      봄
8     여름
9      봄
10    여름
dtype: object

In [11]:
target2 = encoder.transform(target)
target2

array([0, 1, 2, 3, 2, 3, 1, 2, 3, 2, 3])

In [12]:
target3 = encoder.inverse_transform(target2)
target3

array(['가을', '겨울', '봄', '여름', '봄', '여름', '겨울', '봄', '여름', '봄', '여름'],
      dtype=object)

In [13]:
words = ['apple', 'summer', 'summer', 'spring', 'spring']
hobby = ['book', 'run','talk','coffee','song']

In [14]:
df = pd.DataFrame({
        'words' : words,
        'hobby' : hobby
})
df

Unnamed: 0,words,hobby
0,apple,book
1,summer,run
2,summer,talk
3,spring,coffee
4,spring,song


In [15]:
encoder2 = LabelEncoder()

In [16]:
df.columns

Index(['words', 'hobby'], dtype='object')

In [17]:
for col in df.columns:
    df[col] = encoder2.fit_transform(df[col])

In [18]:
df

Unnamed: 0,words,hobby
0,0,0
1,2,2
2,2,4
3,1,1
4,1,3


In [35]:
df0= df.copy()
df0

Unnamed: 0,words,hobby
0,0,0
1,2,2
2,2,4
3,1,1
4,1,3


In [19]:
# 라벨 인코딩의 대상 
# 1. 연속적인 데이터(숫자의미, 연속적인 값을 가질 수 있음.) 
#    온도, 습도, 나이 등등 대상X
# 2. 문자형의 객체형 데이터(카테고리를 가질 것 데이터)
#    색, 성별, 사는 도시, 자동화 회사,..) 대상O


In [41]:
df2 = pd.DataFrame({"words" : ['apple', 'summer', 'summer', 'spring', 'spring'], 
                    "hobby" : ['book', 'run','talk','coffee','song']})
df2

Unnamed: 0,words,hobby
0,apple,book
1,summer,run
2,summer,talk
3,spring,coffee
4,spring,song


In [42]:
dummy = pd.get_dummies(df2['hobby'], dtype=int)
dummy

Unnamed: 0,book,coffee,run,song,talk
0,1,0,0,0,0
1,0,0,1,0,0
2,0,0,0,0,1
3,0,1,0,0,0
4,0,0,0,1,0


In [43]:
df2

Unnamed: 0,words,hobby
0,apple,book
1,summer,run
2,summer,talk
3,spring,coffee
4,spring,song


In [44]:
df2_dummy = pd.concat([df2, dummy], axis=1)
df2_dummy

Unnamed: 0,words,hobby,book,coffee,run,song,talk
0,apple,book,1,0,0,0,0
1,summer,run,0,0,1,0,0
2,summer,talk,0,0,0,0,1
3,spring,coffee,0,1,0,0,0
4,spring,song,0,0,0,1,0


In [45]:
dummy2 = pd.get_dummies(df2['words'], dtype=int)
dummy2

Unnamed: 0,apple,spring,summer
0,1,0,0
1,0,0,1
2,0,0,1
3,0,1,0
4,0,1,0


In [46]:
df3_dummy = pd.concat([df2_dummy, dummy2], axis=1)
df3_dummy

Unnamed: 0,words,hobby,book,coffee,run,song,talk,apple,spring,summer
0,apple,book,1,0,0,0,0,1,0,0
1,summer,run,0,0,1,0,0,0,0,1
2,summer,talk,0,0,0,0,1,0,0,1
3,spring,coffee,0,1,0,0,0,0,1,0
4,spring,song,0,0,0,1,0,0,1,0


In [49]:
df3_dummy.drop(['words', 'hobby'], axis=1, inplace=True)

In [50]:
df3_dummy

Unnamed: 0,book,coffee,run,song,talk,apple,spring,summer
0,1,0,0,0,0,1,0,0
1,0,0,1,0,0,0,0,1
2,0,0,0,0,1,0,0,1
3,0,1,0,0,0,0,1,0
4,0,0,0,1,0,0,1,0


In [51]:
df3 = pd.DataFrame({"words" : ['apple', 'summer', 'summer', 'spring', 'spring'], 
                    "hobby" : ['book', 'run','talk','coffee','song']})
df3

Unnamed: 0,words,hobby
0,apple,book
1,summer,run
2,summer,talk
3,spring,coffee
4,spring,song


In [54]:
df2_result = pd.get_dummies(df2, dtype=int)
df2_result

Unnamed: 0,words_apple,words_spring,words_summer,hobby_book,hobby_coffee,hobby_run,hobby_song,hobby_talk
0,1,0,0,1,0,0,0,0
1,0,0,1,0,0,1,0,0
2,0,0,1,0,0,0,0,1
3,0,1,0,0,1,0,0,0
4,0,1,0,0,0,0,1,0


In [55]:
bank = pd.read_csv('../csv-data/banklist.csv')
bank

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date
0,Washington Federal Bank for Savings,Chicago,IL,30570,Royal Savings Bank,15-Dec-17,20-Dec-17
1,The Farmers and Merchants State Bank of Argonia,Argonia,KS,17719,Conway Bank,13-Oct-17,20-Oct-17
2,Fayette County Bank,Saint Elmo,IL,1802,"United Fidelity Bank, fsb",26-May-17,26-Jul-17
3,"Guaranty Bank, (d/b/a BestBank in Georgia & Mi...",Milwaukee,WI,30003,First-Citizens Bank & Trust Company,5-May-17,26-Jul-17
4,First NBC Bank,New Orleans,LA,58302,Whitney Bank,28-Apr-17,5-Dec-17
...,...,...,...,...,...,...,...
550,"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB",27-Jul-01,19-Aug-14
551,Malta National Bank,Malta,OH,6629,North Valley Bank,3-May-01,18-Nov-02
552,First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03
553,National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,14-Dec-00,17-Mar-05


In [56]:
bank.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 555 entries, 0 to 554
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Bank Name              555 non-null    object
 1   City                   555 non-null    object
 2   ST                     555 non-null    object
 3   CERT                   555 non-null    int64 
 4   Acquiring Institution  555 non-null    object
 5   Closing Date           555 non-null    object
 6   Updated Date           555 non-null    object
dtypes: int64(1), object(6)
memory usage: 30.5+ KB


In [57]:
encoder_bank = LabelEncoder()

In [58]:
#1. 사전만들고 --> 2. 변환하고!

In [59]:
encoder_bank.fit(bank['City']) #사전을 만들어라.

In [60]:
encoder_bank.classes_ #오름차순으로 번호가 부여됨.

array(['Acworth', 'Ailey', 'Alamo', 'Albuquerque', 'Aledo', 'Alpharetta',
       'Altus', 'Andover', 'Anthony', 'Antioch', 'Apollo Beach', 'Arcola',
       'Argonia', 'Arlington', 'Asheville', 'Atlanta', 'Aurora', 'Austin',
       'Aventura', 'Bainbridge Island', 'Bakersfield', 'Bala Cynwyd',
       'Baltimore', 'Barnesville', 'Bartow', 'Batesville', 'Beaverton',
       'Bel Air', 'Belleview', 'Bellingham', 'Bentonville', 'Berkeley',
       'Berwyn', 'Birmingham', 'Blackwell', 'Blanchardville',
       'Bloomington', 'Bluffton', 'Boca Raton', 'Boise', 'Bonifay',
       'Boothwyn', 'Bradenton', 'Braselton', 'Bremerton', 'Bridgeport',
       'Brooksville', 'Brunswick', 'Buford', 'Burlington', 'Butler',
       'Calabasas', 'Camargo', 'Cambridge', 'Cape Coral', 'Carrabelle',
       'Carrollton', 'Carson City', 'Cartersville', 'Carthage',
       'Cassville', 'Castle Rock', 'Cave Junction', 'Champaign',
       'Champlin', 'Charleston', 'Cheneyville', 'Cherry Hill',
       'Chesterfield', 'Chi

In [61]:
len(encoder_bank.classes_)

426

In [64]:
encoder_bank.classes_[-10:-1]

array(['Winchester', 'Winder', 'Windsor', 'Winter Park', 'Wood Dale',
       'Woodbury', 'Woodland Hills', 'Woodstock', 'Worth'], dtype=object)

In [67]:
city2 = encoder_bank.transform(bank['City'])
city2

array([ 69,  12, 328, 239, 250,  88,  69, 290, 243, 421, 185, 232, 239,
       209, 289, 104,  69, 336,  15,  69,  92, 222, 279,  69, 320,  69,
        85, 139, 134, 240,  27,  71,  32, 128, 165, 371,  39, 115, 407,
       389, 148, 114,  45, 294, 286, 404, 136, 347, 259, 182, 344, 395,
        14, 109, 202, 225, 271, 203, 146, 191,  69,   7, 394, 377,  43,
       308, 214,  32, 346, 383, 106,  93, 366,  36, 403, 174,  69, 199,
       423,  48, 247, 144,   1, 215, 226, 277, 412, 348,  65, 186, 379,
       260, 279, 288, 223, 140,  53, 135, 102, 414, 321,  69, 108, 207,
       119, 350, 163, 188, 132, 138, 173,  41, 373,  28, 294,  92, 193,
       176, 323, 329, 270, 105, 153, 177, 103,  74,   4,  90,  14, 151,
       121, 425, 298, 248, 257, 238, 423,  97, 141, 370, 278, 168, 269,
        80, 352, 126,  82, 318, 152, 342,  10, 307, 306, 373,  15, 418,
        61,  69,  73, 384, 172,  65, 357,  49, 138, 218,  79, 242, 395,
        98,  46, 419,  59, 326,  33,  33, 113,  58, 196, 408, 42

In [66]:
encoder_bank.classes_[69]

'Chicago'

In [69]:
bank['City'] = city2
bank

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date,city
0,Washington Federal Bank for Savings,69,IL,30570,Royal Savings Bank,15-Dec-17,20-Dec-17,69
1,The Farmers and Merchants State Bank of Argonia,12,KS,17719,Conway Bank,13-Oct-17,20-Oct-17,12
2,Fayette County Bank,328,IL,1802,"United Fidelity Bank, fsb",26-May-17,26-Jul-17,328
3,"Guaranty Bank, (d/b/a BestBank in Georgia & Mi...",239,WI,30003,First-Citizens Bank & Trust Company,5-May-17,26-Jul-17,239
4,First NBC Bank,250,LA,58302,Whitney Bank,28-Apr-17,5-Dec-17,250
...,...,...,...,...,...,...,...,...
550,"Superior Bank, FSB",162,IL,32646,"Superior Federal, FSB",27-Jul-01,19-Aug-14,162
551,Malta National Bank,220,OH,6629,North Valley Bank,3-May-01,18-Nov-02,220
552,First Alliance Bank & Trust Co.,221,NH,34264,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03,221
553,National State Bank of Metropolis,235,IL,3815,Banterra Bank of Marion,14-Dec-00,17-Mar-05,235


In [70]:
bank.drop('city', axis=1, inplace=True)
bank

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date
0,Washington Federal Bank for Savings,69,IL,30570,Royal Savings Bank,15-Dec-17,20-Dec-17
1,The Farmers and Merchants State Bank of Argonia,12,KS,17719,Conway Bank,13-Oct-17,20-Oct-17
2,Fayette County Bank,328,IL,1802,"United Fidelity Bank, fsb",26-May-17,26-Jul-17
3,"Guaranty Bank, (d/b/a BestBank in Georgia & Mi...",239,WI,30003,First-Citizens Bank & Trust Company,5-May-17,26-Jul-17
4,First NBC Bank,250,LA,58302,Whitney Bank,28-Apr-17,5-Dec-17
...,...,...,...,...,...,...,...
550,"Superior Bank, FSB",162,IL,32646,"Superior Federal, FSB",27-Jul-01,19-Aug-14
551,Malta National Bank,220,OH,6629,North Valley Bank,3-May-01,18-Nov-02
552,First Alliance Bank & Trust Co.,221,NH,34264,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03
553,National State Bank of Metropolis,235,IL,3815,Banterra Bank of Marion,14-Dec-00,17-Mar-05


In [72]:
st_one = pd.get_dummies(bank['ST'], dtype=int)
st_one

Unnamed: 0,AL,AR,AZ,CA,CO,CT,FL,GA,HI,IA,...,SC,SD,TN,TX,UT,VA,WA,WI,WV,WY
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
550,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
551,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
552,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
553,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [73]:
bank.drop('ST', axis=1, inplace=True) 

In [74]:
bank

Unnamed: 0,Bank Name,City,CERT,Acquiring Institution,Closing Date,Updated Date
0,Washington Federal Bank for Savings,69,30570,Royal Savings Bank,15-Dec-17,20-Dec-17
1,The Farmers and Merchants State Bank of Argonia,12,17719,Conway Bank,13-Oct-17,20-Oct-17
2,Fayette County Bank,328,1802,"United Fidelity Bank, fsb",26-May-17,26-Jul-17
3,"Guaranty Bank, (d/b/a BestBank in Georgia & Mi...",239,30003,First-Citizens Bank & Trust Company,5-May-17,26-Jul-17
4,First NBC Bank,250,58302,Whitney Bank,28-Apr-17,5-Dec-17
...,...,...,...,...,...,...
550,"Superior Bank, FSB",162,32646,"Superior Federal, FSB",27-Jul-01,19-Aug-14
551,Malta National Bank,220,6629,North Valley Bank,3-May-01,18-Nov-02
552,First Alliance Bank & Trust Co.,221,34264,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03
553,National State Bank of Metropolis,235,3815,Banterra Bank of Marion,14-Dec-00,17-Mar-05


In [75]:
bank_dummy = pd.concat([bank, st_one], axis=1)
bank_dummy

Unnamed: 0,Bank Name,City,CERT,Acquiring Institution,Closing Date,Updated Date,AL,AR,AZ,CA,...,SC,SD,TN,TX,UT,VA,WA,WI,WV,WY
0,Washington Federal Bank for Savings,69,30570,Royal Savings Bank,15-Dec-17,20-Dec-17,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Farmers and Merchants State Bank of Argonia,12,17719,Conway Bank,13-Oct-17,20-Oct-17,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Fayette County Bank,328,1802,"United Fidelity Bank, fsb",26-May-17,26-Jul-17,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guaranty Bank, (d/b/a BestBank in Georgia & Mi...",239,30003,First-Citizens Bank & Trust Company,5-May-17,26-Jul-17,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,First NBC Bank,250,58302,Whitney Bank,28-Apr-17,5-Dec-17,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
550,"Superior Bank, FSB",162,32646,"Superior Federal, FSB",27-Jul-01,19-Aug-14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
551,Malta National Bank,220,6629,North Valley Bank,3-May-01,18-Nov-02,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
552,First Alliance Bank & Trust Co.,221,34264,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
553,National State Bank of Metropolis,235,3815,Banterra Bank of Marion,14-Dec-00,17-Mar-05,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [76]:
bank.describe()

Unnamed: 0,City,CERT
count,555.0,555.0
mean,204.100901,31702.318919
std,124.703506,16401.784351
min,0.0,91.0
25%,90.5,20315.0
50%,204.0,32185.0
75%,310.5,35364.0
max,425.0,58701.0


In [77]:
bank['cert_cat'] = pd.cut(bank['CERT'],
                          bins = [0, 25000, 35000, 60000],
                          include_lowest=True,
                          labels=['low','middle','high'])
bank['cert_cat'][:5]

0    middle
1       low
2       low
3    middle
4      high
Name: cert_cat, dtype: category
Categories (3, object): ['low' < 'middle' < 'high']

In [79]:
cert_cat = pd.get_dummies(bank['cert_cat'], dtype=int)
cert_cat

Unnamed: 0,low,middle,high
0,0,1,0
1,1,0,0
2,1,0,0
3,0,1,0
4,0,0,1
...,...,...,...
550,0,1,0
551,1,0,0
552,0,1,0
553,1,0,0


In [80]:
bank_dummy2 = pd.concat([bank, cert_cat], axis=1)
bank_dummy2

Unnamed: 0,Bank Name,City,CERT,Acquiring Institution,Closing Date,Updated Date,cert_cat,low,middle,high
0,Washington Federal Bank for Savings,69,30570,Royal Savings Bank,15-Dec-17,20-Dec-17,middle,0,1,0
1,The Farmers and Merchants State Bank of Argonia,12,17719,Conway Bank,13-Oct-17,20-Oct-17,low,1,0,0
2,Fayette County Bank,328,1802,"United Fidelity Bank, fsb",26-May-17,26-Jul-17,low,1,0,0
3,"Guaranty Bank, (d/b/a BestBank in Georgia & Mi...",239,30003,First-Citizens Bank & Trust Company,5-May-17,26-Jul-17,middle,0,1,0
4,First NBC Bank,250,58302,Whitney Bank,28-Apr-17,5-Dec-17,high,0,0,1
...,...,...,...,...,...,...,...,...,...,...
550,"Superior Bank, FSB",162,32646,"Superior Federal, FSB",27-Jul-01,19-Aug-14,middle,0,1,0
551,Malta National Bank,220,6629,North Valley Bank,3-May-01,18-Nov-02,low,1,0,0
552,First Alliance Bank & Trust Co.,221,34264,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03,middle,0,1,0
553,National State Bank of Metropolis,235,3815,Banterra Bank of Marion,14-Dec-00,17-Mar-05,low,1,0,0


In [81]:
bank_dummy2.drop(['CERT', 'cert_cat'], axis=1, inplace=True)

In [82]:
bank_dummy2

Unnamed: 0,Bank Name,City,Acquiring Institution,Closing Date,Updated Date,low,middle,high
0,Washington Federal Bank for Savings,69,Royal Savings Bank,15-Dec-17,20-Dec-17,0,1,0
1,The Farmers and Merchants State Bank of Argonia,12,Conway Bank,13-Oct-17,20-Oct-17,1,0,0
2,Fayette County Bank,328,"United Fidelity Bank, fsb",26-May-17,26-Jul-17,1,0,0
3,"Guaranty Bank, (d/b/a BestBank in Georgia & Mi...",239,First-Citizens Bank & Trust Company,5-May-17,26-Jul-17,0,1,0
4,First NBC Bank,250,Whitney Bank,28-Apr-17,5-Dec-17,0,0,1
...,...,...,...,...,...,...,...,...
550,"Superior Bank, FSB",162,"Superior Federal, FSB",27-Jul-01,19-Aug-14,0,1,0
551,Malta National Bank,220,North Valley Bank,3-May-01,18-Nov-02,1,0,0
552,First Alliance Bank & Trust Co.,221,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03,0,1,0
553,National State Bank of Metropolis,235,Banterra Bank of Marion,14-Dec-00,17-Mar-05,1,0,0
