In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
from matplotlib import font_manager, rc
%matplotlib inline

# 글꼴 경로 지정
font_path = "c:/Windows/Fonts/malgun.ttf"  # 윈도우에 설치된 맑은 고딕 폰트 경로

# 폰트 이름 얻어오기
font_name = font_manager.FontProperties(fname=font_path).get_name()

# matplotlib의 rc(run command) 기능을 이용하여 글꼴 설정
mpl.rc('font', family=font_name)

# 유니코드에서  음수 부호 설정
mpl.rc('axes', unicode_minus=False)

path='../../../../../datasets/paris_listings.csv'
raw = pd.read_csv(path)
df = raw.copy()

df = df[['id', 'host_id','host_is_superhost',
        'host_total_listings_count','neighbourhood_cleansed','room_type','accommodates','bathrooms','bedrooms',
        'beds','amenities','price','minimum_nights','maximum_nights','number_of_reviews', 'number_of_reviews_l30d', 'review_scores_rating', 'review_scores_accuracy', 
        'review_scores_cleanliness', 'review_scores_checkin', 'review_scores_communication', 
        'review_scores_location', 'review_scores_value', 'reviews_per_month','listing_url','property_type','number_of_reviews_ltm','has_availability','last_review']]

df = df.rename(columns= {
    'id': '숙소_id',
    'host_id': '호스트_id',
    'host_is_superhost': '슈퍼호스트',
    'host_total_listings_count': '숙소_수',
    'neighbourhood_cleansed': '숙소_지역',
    'room_type': '숙소_유형',
    'accommodates': '수용_인원수',
    'bathrooms': '욕실수',
    'bedrooms': '침실수',
    'beds': '침대수',
    'amenities': '편의시설',
    'price': '숙소_가격',
    'minimum_nights': '최소_숙박일',
    'maximum_nights': '최대_숙박일',
    'number_of_reviews': '리뷰수',
    'number_of_reviews_l30d': '30일_리뷰수',
    'review_scores_rating': '리뷰점수',
    'review_scores_accuracy': '숙소_정확성_리뷰점수',
    'review_scores_cleanliness': '숙소_청결도_리뷰점수',
    'review_scores_checkin': '숙소_체크인_리뷰점수',
    'review_scores_communication': '숙소_소통_리뷰점수',
    'review_scores_location': '숙소_위치_리뷰점수',
    'review_scores_value': '숙소_가격_리뷰점수',
    'reviews_per_month': '평균_리뷰수',
    'listing_url':'url',
    'property_type':'숙소_특징',
    'number_of_reviews_ltm':'12개월_리뷰수',
    'has_availability':'예약가능여부',
    'last_review':'마지막_리뷰'
})

print(len(df[df['리뷰수']==0]))
# 슈퍼호스트, 리뷰수 결측치 제거
print(f"처음 df : {len(df)}")
df = df[~df['슈퍼호스트'].isnull()]
print(f"슈퍼호스트 제거 : {len(df)}")
df = df[df['리뷰수']>2]
print(f"리뷰수 제거 : {len(df)}")

# 숙소가격 null값 제거
df= df[~df['숙소_가격'].isnull()]

#가격 앞 통화기호 제거
df['숙소_가격'] = df['숙소_가격'].replace('[\$,]', '', regex=True).astype(float)

# 유형 제거 
df = df[(df['숙소_유형']=='Entire home/apt') | (df['숙소_유형']=='Private room')]
print(f"숙소_유형 제거 : {len(df)}")

# 12개월 리뷰수 0 개 제거
df = df[df['12개월_리뷰수']!=0]
print(f"12개월_리뷰수 0개 제거 : {len(df)}")

# 욕실수, 침실수, 침대수 null값 제거
df = df.dropna(subset=['욕실수', '침실수', '침대수'])
print(f"욕실수, 침실수, 침대수 제거 후 : {len(df)}")

# 리뷰  null값 제거
df = df.dropna(subset=['숙소_정확성_리뷰점수','숙소_청결도_리뷰점수','숙소_체크인_리뷰점수','숙소_소통_리뷰점수','숙소_위치_리뷰점수','숙소_가격_리뷰점수'])
print(f"리뷰 결측치 제거 후 : {len(df)}")

# 예약 가능여부 f 버리기
df = df.dropna(subset='예약가능여부')
print(f"예약가능여부 : {len(df)}")

df_guest_prefer= df[(df['리뷰점수'] >= 4.9) & (df['리뷰수'] >= 5) & (df['슈퍼호스트'] == 't')]
df_non_guest_prefer = df[(df['슈퍼호스트'] == 'f') & (df['리뷰수'] >= 5)].sort_values('리뷰점수', ascending=True).head(4716)

25576
처음 df : 84397
슈퍼호스트 제거 : 84306
리뷰수 제거 : 47841
숙소_유형 제거 : 34280
12개월_리뷰수 0개 제거 : 31052
욕실수, 침실수, 침대수 제거 후 : 30997
리뷰 결측치 제거 후 : 30997
예약가능여부 : 30990


# 게스트 선호,비선호 비교

In [18]:
df['편의시설'].unique()

array(['["Lockbox", "Wifi", "Kitchen", "Refrigerator", "Hangers", "Coffee maker", "Host greets you", "Hot water", "Iron", "Hair dryer", "Paid parking garage on premises \\u2013 1 space", "Shampoo", "Smoke alarm", "Essentials", "Self check-in", "Bed linens", "TV", "Dishes and silverware", "Oven", "Heating", "Paid street parking off premises", "Microwave", "Cooking basics"]',
       '["Cooking basics", "Long term stays allowed", "Wifi", "Dedicated workspace", "TV with standard cable", "Hangers", "Bed linens", "Heating", "Iron", "Fire extinguisher", "Body soap", "Shampoo", "Hair dryer", "Host greets you", "Kitchen", "Hot water", "Coffee maker: espresso machine, pour-over coffee", "Coffee", "Extra pillows and blankets", "Stove", "Hot water kettle", "Cleaning products", "Essentials", "Wine glasses", "Smoke alarm", "Baking sheet", "Books and reading material", "First aid kit", "Drying rack for clothing", "Portable fans", "Shower gel", "Refrigerator", "Dishes and silverware", "Toaster", "Oven

In [20]:
# 모든 편의시설을 파싱하여 유니크한 편의시설 세트 생성
amenities_set = set()
df['amenities'].apply(lambda x: amenities_set.update(eval(x)))

# 각 편의시설에 대해 이진 특성 열을 데이터프레임에 추가
for amenity in amenities_set:
    df[amenity] = df['amenities'].apply(lambda x: amenity in eval(x))

# 'amenities' 열 삭제 (옵션)
df.drop('amenities', axis=1, inplace=True)

print(df)

  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in eval(x))
  df[amenity] = df['편의시설'].apply(lambda x: amenity in e

                     숙소_id     호스트_id 슈퍼호스트  숙소_수              숙소_지역  \
1                   165409     788732     t   1.0             Temple   
2                     5396       7903     f   3.0     Hôtel-de-Ville   
3                   166370     793032     f   3.0           Entrepôt   
6                     7397       2626     t   9.0     Hôtel-de-Ville   
8                    41106     177387     t   2.0     Hôtel-de-Ville   
...                    ...        ...   ...   ...                ...   
82649  1103696635979533455  564997552     f   1.0  Buttes-Montmartre   
82975  1104437899838519130   13118197     f   2.0  Buttes-Montmartre   
83073  1105169305670543918   13262987     f   2.0  Buttes-Montmartre   
83158  1105941359456285384  344916015     f  27.0       Observatoire   
83393  1107148961327481954   36120497     f   1.0            Reuilly   

                 숙소_유형  수용_인원수  욕실수  침실수  침대수  ...  Proline  refrigerator  \
1      Entire home/apt       2  1.0  0.0  1.0  ...        

In [21]:
df.head()

Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,Proline refrigerator,H/K sound system with Bluetooth and aux,Naturel bio body soap,Fast wifi – 681 Mbps,"65"" HDTV with Amazon Prime Video, Apple TV, Netflix, standard cable",ROSIERES stainless steel single oven,BION shampoo,"42"" HDTV with Amazon Prime Video, Chromecast, Fire TV, Netflix, standard cable","55"" HDTV with Amazon Prime Video, Chromecast, Netflix, Disney+, Apple TV","42"" HDTV with Netflix, standard cable"
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,False,False,False,False,False,False
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,False,False,False,False,False,False
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,False,False,False,False,False,False,False
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,False,False,False
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,False,False,False,False,False,False,False,False


In [22]:
df_guest_prefer= df[(df['리뷰점수'] >= 4.9) & (df['리뷰수'] >= 5) & (df['슈퍼호스트'] == 't')]
df_non_guest_prefer = df[(df['슈퍼호스트'] == 'f') & (df['리뷰수'] >= 5)].sort_values('리뷰점수', ascending=True).head(4716)

In [23]:
#df_guest_prefer.to_csv('../../../../../datasets/paris_prefer.csv',index=False)
#df_non_guest_prefer.to_csv('../../../../../datasets/paris_non_prefer.csv',index=False)

In [50]:
df_guest_prefer = pd.read_csv('../../../../../datasets/paris_prefer.csv')
df_non_guest_prefer = pd.read_csv('../../../../../datasets/paris_non_prefer.csv')

In [46]:
# 모든 열 이름을 소문자로 변환하여 'tv'를 포함하는 모든 편의시설 찾기
tv_columns = [col for col in df.columns if 'tv' in col.lower()]

# 하나의 'TV' 열 생성
df['TV'] = df[tv_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=tv_columns, inplace=True)

print(df)

                     숙소_id     호스트_id 슈퍼호스트  숙소_수              숙소_지역  \
1                   165409     788732     t   1.0             Temple   
2                     5396       7903     f   3.0     Hôtel-de-Ville   
3                   166370     793032     f   3.0           Entrepôt   
6                     7397       2626     t   9.0     Hôtel-de-Ville   
8                    41106     177387     t   2.0     Hôtel-de-Ville   
...                    ...        ...   ...   ...                ...   
82649  1103696635979533455  564997552     f   1.0  Buttes-Montmartre   
82975  1104437899838519130   13118197     f   2.0  Buttes-Montmartre   
83073  1105169305670543918   13262987     f   2.0  Buttes-Montmartre   
83158  1105941359456285384  344916015     f  27.0       Observatoire   
83393  1107148961327481954   36120497     f   1.0            Reuilly   

                 숙소_유형  수용_인원수  욕실수  침실수  침대수  ...  \
1      Entire home/apt       2  1.0  0.0  1.0  ...   
2      Entire home/apt     

In [47]:
# 모든 열 이름을 소문자로 변환하여 'tv'를 포함하는 모든 편의시설 찾기
wifi_columns = [col for col in df.columns if 'wifi' in col.lower()]

# 하나의 'TV' 열 생성
df['Wifi'] = df[wifi_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=wifi_columns, inplace=True)

print(df)

                     숙소_id     호스트_id 슈퍼호스트  숙소_수              숙소_지역  \
1                   165409     788732     t   1.0             Temple   
2                     5396       7903     f   3.0     Hôtel-de-Ville   
3                   166370     793032     f   3.0           Entrepôt   
6                     7397       2626     t   9.0     Hôtel-de-Ville   
8                    41106     177387     t   2.0     Hôtel-de-Ville   
...                    ...        ...   ...   ...                ...   
82649  1103696635979533455  564997552     f   1.0  Buttes-Montmartre   
82975  1104437899838519130   13118197     f   2.0  Buttes-Montmartre   
83073  1105169305670543918   13262987     f   2.0  Buttes-Montmartre   
83158  1105941359456285384  344916015     f  27.0       Observatoire   
83393  1107148961327481954   36120497     f   1.0            Reuilly   

                 숙소_유형  수용_인원수  욕실수  침실수  침대수  ...  \
1      Entire home/apt       2  1.0  0.0  1.0  ...   
2      Entire home/apt     

In [48]:
df.columns

Index(['숙소_id', '호스트_id', '슈퍼호스트', '숙소_수', '숙소_지역', '숙소_유형', '수용_인원수', '욕실수',
       '침실수', '침대수',
       ...
       'Rosière stainless steel induction stove', 'MKL GREEN NATURE body soap',
       'Scholtès stainless steel induction stove', 'Organic body soap',
       'Savon français  body soap', 'Proline  refrigerator',
       'H/K sound system with Bluetooth and aux', 'Naturel bio body soap',
       'ROSIERES stainless steel single oven', 'BION shampoo'],
      dtype='object', length=4175)

In [49]:
len(df[df['Wifi']==True])

KeyError: 'Wifi'

In [62]:
body_columns = [col for col in df.columns if 'body soap' in col.lower()]

# 하나의 'TV' 열 생성
df['body_soup'] = df[body_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=body_columns, inplace=True)

print(df)

  df['body_soup'] = df[body_columns].any(axis=1)


                     숙소_id     호스트_id 슈퍼호스트  숙소_수              숙소_지역  \
1                   165409     788732     t   1.0             Temple   
2                     5396       7903     f   3.0     Hôtel-de-Ville   
3                   166370     793032     f   3.0           Entrepôt   
6                     7397       2626     t   9.0     Hôtel-de-Ville   
8                    41106     177387     t   2.0     Hôtel-de-Ville   
...                    ...        ...   ...   ...                ...   
82649  1103696635979533455  564997552     f   1.0  Buttes-Montmartre   
82975  1104437899838519130   13118197     f   2.0  Buttes-Montmartre   
83073  1105169305670543918   13262987     f   2.0  Buttes-Montmartre   
83158  1105941359456285384  344916015     f  27.0       Observatoire   
83393  1107148961327481954   36120497     f   1.0            Reuilly   

                 숙소_유형  수용_인원수  욕실수  침실수  침대수  ...  IKEA induction stove  \
1      Entire home/apt       2  1.0  0.0  1.0  ...         

In [67]:
shampoo_columns = [col for col in df.columns if 'shampoo' in col.lower()]
# 하나의 'TV' 열 생성
df['shampoo'] = df[shampoo_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=shampoo_columns, inplace=True)

print(df)

                     숙소_id     호스트_id 슈퍼호스트  숙소_수              숙소_지역  \
1                   165409     788732     t   1.0             Temple   
2                     5396       7903     f   3.0     Hôtel-de-Ville   
3                   166370     793032     f   3.0           Entrepôt   
6                     7397       2626     t   9.0     Hôtel-de-Ville   
8                    41106     177387     t   2.0     Hôtel-de-Ville   
...                    ...        ...   ...   ...                ...   
82649  1103696635979533455  564997552     f   1.0  Buttes-Montmartre   
82975  1104437899838519130   13118197     f   2.0  Buttes-Montmartre   
83073  1105169305670543918   13262987     f   2.0  Buttes-Montmartre   
83158  1105941359456285384  344916015     f  27.0       Observatoire   
83393  1107148961327481954   36120497     f   1.0            Reuilly   

                 숙소_유형  수용_인원수  욕실수  침실수  침대수  ...  IKEA induction stove  \
1      Entire home/apt       2  1.0  0.0  1.0  ...         

  df['shampoo'] = df[shampoo_columns].any(axis=1)


In [68]:
df

Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,IKEA induction stove,Sumsumg sound system with Bluetooth and aux,All kind conditioner,Rosière stainless steel induction stove,Scholtès stainless steel induction stove,Proline refrigerator,H/K sound system with Bluetooth and aux,ROSIERES stainless steel single oven,body_soup,shampoo
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,False,False,False,False,False,True
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,False,False,False,False,True,True
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,False,False,False,False,False,False,True
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,False,False,True
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,False,False,False,False,False,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,False,False,False,False,False,False,False,True,False
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,False,False,False,False,False,False,False,True,False
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,False,False,False
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,False,False,False,False,False,False,False,True


In [72]:
oven_columns = [col for col in df.columns if 'oven' in col.lower()]

print(len(oven_columns))

df['oven'] = df[oven_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=oven_columns, inplace=True)

df

1


Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,Google speaker sound system,IKEA induction stove,Sumsumg sound system with Bluetooth and aux,All kind conditioner,Rosière stainless steel induction stove,Scholtès stainless steel induction stove,Proline refrigerator,H/K sound system with Bluetooth and aux,body_soup,shampoo
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,False,False,False,False,False,True
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,False,False,False,False,True,True
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,False,False,False,False,False,False,True
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,False,False,True
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,False,False,False,False,False,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,False,False,False,False,False,False,False,True,False
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,False,False,False,False,False,False,False,True,False
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,False,False,False
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,False,False,False,False,False,False,False,True


In [73]:
stove_columns = [col for col in df.columns if 'stove' in col.lower()]

print(len(stove_columns))

df['stove'] = df[stove_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=stove_columns, inplace=True)

df

326


  df['stove'] = df[stove_columns].any(axis=1)


Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,Urban Ears Bluetooth sound system,Paid parking garage on premises – 1 space,Google speaker sound system,Sumsumg sound system with Bluetooth and aux,All kind conditioner,Proline refrigerator,H/K sound system with Bluetooth and aux,body_soup,shampoo,stove
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,True,False,False,False,False,False,False,True,False
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,False,False,False,True,True,True
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,False,False,False,False,False,True,False
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,False,True,True
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,False,False,False,False,False,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,False,False,False,False,False,False,True,False,False
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,False,False,False,False,False,False,True,False,True
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,False,False,False
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,False,False,False,False,False,False,True,False


In [74]:

refrigerator_columns = [col for col in df.columns if 'refrigerator' in col.lower()]

print(len(refrigerator_columns))

df['refrigerator'] = df[refrigerator_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=refrigerator_columns, inplace=True)

df

342


  df['refrigerator'] = df[refrigerator_columns].any(axis=1)


Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,Urban Ears Bluetooth sound system,Paid parking garage on premises – 1 space,Google speaker sound system,Sumsumg sound system with Bluetooth and aux,All kind conditioner,H/K sound system with Bluetooth and aux,body_soup,shampoo,stove,refrigerator
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,True,False,False,False,False,False,True,False,True
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,False,False,True,True,True,True
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,False,False,False,False,True,False,True
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,True,True,True
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,False,False,False,False,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,False,False,False,False,False,True,False,False,False
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,False,False,False,False,False,True,False,True,True
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,False,False,True
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,False,False,False,False,False,True,False,True


In [75]:


sound_columns = [col for col in df.columns if 'sound' in col.lower()]

print(len(sound_columns))

df['sound'] = df[sound_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=sound_columns, inplace=True)

df

670


  df['sound'] = df[sound_columns].any(axis=1)


Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,"Exercise equipment: free weights, stationary bike, yoga mat",Yves Rocher conditioner,Housekeeping available every day - included with your stay,Paid parking garage on premises – 1 space,All kind conditioner,body_soup,shampoo,stove,refrigerator,sound
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,True,False,False,True,False,True,False
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,False,True,True,True,True,False
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,False,False,False,True,False,True,False
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,True,True,True,False
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,False,False,False,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,False,False,False,False,True,False,False,False,False
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,False,False,False,False,True,False,True,True,False
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,False,True,False
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,False,False,False,False,True,False,True,False


In [76]:


conditioner_columns = [col for col in df.columns if 'conditioner' in col.lower()]

print(len(conditioner_columns))

df['conditioner'] = df[conditioner_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=conditioner_columns, inplace=True)

df

284


  df['conditioner'] = df[conditioner_columns].any(axis=1)


Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,Private patio or balcony,"Exercise equipment: free weights, stationary bike, yoga mat",Housekeeping available every day - included with your stay,Paid parking garage on premises – 1 space,body_soup,shampoo,stove,refrigerator,sound,conditioner
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,True,False,True,False,True,False,False
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,False,True,True,True,True,False,False
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,False,False,True,False,True,False,False
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,True,True,True,False,False
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,False,False,True,True,True,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,False,False,False,True,False,False,False,False,False
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,False,False,False,True,False,True,True,False,False
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,False,True,False,False
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,False,False,False,True,False,True,False,False


In [78]:


housekeeping_columns = [col for col in df.columns if 'housekeeping' in col.lower()]

print(len(housekeeping_columns))

df['housekeeping'] = df[housekeeping_columns].any(axis=1)

# 기존의 TV 관련 열 삭제
df.drop(columns=housekeeping_columns, inplace=True)

df

56


  df['housekeeping'] = df[housekeeping_columns].any(axis=1)


Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,Private patio or balcony,"Exercise equipment: free weights, stationary bike, yoga mat",Paid parking garage on premises – 1 space,body_soup,shampoo,stove,refrigerator,sound,conditioner,housekeeping
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,False,True,False,True,False,True,False,False,False
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,True,True,True,True,False,False,False
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,False,True,False,True,False,False,False
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,True,True,True,False,False,False
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,False,True,True,True,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,False,False,True,False,False,False,False,False,False
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,False,False,True,False,True,True,False,False,False
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,False,True,False,False,False
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,False,False,True,False,True,False,False,False


In [82]:
parking_columns = [col for col in df.columns if 'parking' in col.lower()]

df['parking'] = df[parking_columns].any(axis=1)
print(len(parking_columns))


df.drop(columns=parking_columns, inplace=True)
df


  df['parking'] = df[parking_columns].any(axis=1)


In [83]:
df

Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,Private patio or balcony,"Exercise equipment: free weights, stationary bike, yoga mat",body_soup,shampoo,stove,refrigerator,sound,conditioner,housekeeping,parking
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,True,False,True,False,False,False,True
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,True,True,True,True,False,False,False,False
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,True,False,True,False,False,False,True
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,True,True,True,False,False,False,True
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,True,True,True,True,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,False,True,False,False,False,False,False,False,False
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,False,True,False,True,True,False,False,False,False
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,True,False,False,False,False
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,False,True,False,True,False,False,False,True


In [89]:
weights_columns = [
    col for col in df.columns if 'weights' in col.lower() or 'exercise' in col.lower()
]


df['weights'] = df[weights_columns].any(axis=1)
print(len(weights_columns))


df.drop(columns=weights_columns, inplace=True)
df


10


Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,Paid crib - available upon request,Private patio or balcony,body_soup,shampoo,stove,refrigerator,sound,conditioner,housekeeping,parking
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,False,False,True,False,True,False,False,False,True
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,False,True,True,True,True,False,False,False,False
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,False,True,False,True,False,False,False,True
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,True,True,True,False,False,False,True
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,False,True,True,True,True,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,False,True,False,False,False,False,False,False,False
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,False,True,False,True,True,False,False,False,False
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,False,True,False,False,False,False
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,False,True,False,True,False,False,False,True


In [90]:
df.columns[:-20]

Index(['숙소_id', '호스트_id', '슈퍼호스트', '숙소_수', '숙소_지역', '숙소_유형', '수용_인원수', '욕실수',
       '침실수', '침대수',
       ...
       'Lake access',
       'Coffee maker: espresso machine, french press, Nespresso', 'Smart lock',
       'BBQ grill: electric',
       'Children’s books and toys for ages 0-2 years old, 2-5 years old, 5-10 years old, and 10+ years old',
       'Private hot tub - available all year, open specific hours',
       'Pre-stocking - included with your stay', 'Coffee',
       'Public or shared beach access', 'Board games'],
      dtype='object', length=438)

In [91]:
game_columns = [col for col in df.columns if 'game' in col.lower() ]


df['game'] = df[game_columns].any(axis=1)
print(len(game_columns))


df.drop(columns=game_columns, inplace=True)
df.columns[:-20]

40


  df['game'] = df[game_columns].any(axis=1)


Unnamed: 0,숙소_id,호스트_id,슈퍼호스트,숙소_수,숙소_지역,숙소_유형,수용_인원수,욕실수,침실수,침대수,...,Private patio or balcony,body_soup,shampoo,stove,refrigerator,sound,conditioner,housekeeping,parking,game
1,165409,788732,t,1.0,Temple,Entire home/apt,2,1.0,0.0,1.0,...,False,False,True,False,True,False,False,False,True,False
2,5396,7903,f,3.0,Hôtel-de-Ville,Entire home/apt,2,1.0,0.0,1.0,...,False,True,True,True,True,False,False,False,False,False
3,166370,793032,f,3.0,Entrepôt,Entire home/apt,4,1.0,1.0,1.0,...,False,False,True,False,True,False,False,False,True,True
6,7397,2626,t,9.0,Hôtel-de-Ville,Entire home/apt,4,1.0,2.0,2.0,...,False,False,True,True,True,False,False,False,True,False
8,41106,177387,t,2.0,Hôtel-de-Ville,Entire home/apt,4,1.0,1.0,2.0,...,False,True,True,True,True,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82649,1103696635979533455,564997552,f,1.0,Buttes-Montmartre,Private room,2,1.0,1.0,1.0,...,False,True,False,False,False,False,False,False,False,True
82975,1104437899838519130,13118197,f,2.0,Buttes-Montmartre,Entire home/apt,2,1.0,1.0,0.0,...,False,True,False,True,True,False,False,False,False,False
83073,1105169305670543918,13262987,f,2.0,Buttes-Montmartre,Entire home/apt,4,1.0,2.0,2.0,...,False,False,False,False,True,False,False,False,False,False
83158,1105941359456285384,344916015,f,27.0,Observatoire,Entire home/apt,6,1.0,2.0,3.0,...,False,False,True,False,True,False,False,False,True,False


In [94]:
df.columns[:-20]

Index(['숙소_id', '호스트_id', '슈퍼호스트', '숙소_수', '숙소_지역', '숙소_유형', '수용_인원수', '욕실수',
       '침실수', '침대수',
       ...
       'Sun loungers', 'Lake access',
       'Coffee maker: espresso machine, french press, Nespresso', 'Smart lock',
       'BBQ grill: electric',
       'Children’s books and toys for ages 0-2 years old, 2-5 years old, 5-10 years old, and 10+ years old',
       'Private hot tub - available all year, open specific hours',
       'Pre-stocking - included with your stay', 'Coffee',
       'Public or shared beach access'],
      dtype='object', length=399)

In [95]:
coffee_columns = [col for col in df.columns if 'coffee' in col.lower() ]


df['coffee'] = df[coffee_columns].any(axis=1)
print(len(coffee_columns))


df.drop(columns=coffee_columns, inplace=True)
df.columns[:-20]

45


  df['coffee'] = df[coffee_columns].any(axis=1)


Index(['숙소_id', '호스트_id', '슈퍼호스트', '숙소_수', '숙소_지역', '숙소_유형', '수용_인원수', '욕실수',
       '침실수', '침대수',
       ...
       'Air conditioning',
       'Shared outdoor pool - available all year, open specific hours, heated, olympic-sized',
       'Shared outdoor pool - available seasonally, open specific hours, heated',
       'Sun loungers', 'Lake access', 'Smart lock', 'BBQ grill: electric',
       'Children’s books and toys for ages 0-2 years old, 2-5 years old, 5-10 years old, and 10+ years old',
       'Private hot tub - available all year, open specific hours',
       'Pre-stocking - included with your stay'],
      dtype='object', length=355)

In [99]:
pool_columns = [col for col in df.columns if 'pool' in col.lower() ]

df['pool'] = df[pool_columns].any(axis=1)
print(len(pool_columns))


df.drop(columns=pool_columns, inplace=True)
df.columns[:80]

1


Index(['숙소_id', '호스트_id', '슈퍼호스트', '숙소_수', '숙소_지역', '숙소_유형', '수용_인원수', '욕실수',
       '침실수', '침대수', '숙소_가격', '최소_숙박일', '최대_숙박일', '리뷰수', '30일_리뷰수', '리뷰점수',
       '숙소_정확성_리뷰점수', '숙소_청결도_리뷰점수', '숙소_체크인_리뷰점수', '숙소_소통_리뷰점수', '숙소_위치_리뷰점수',
       '숙소_가격_리뷰점수', '평균_리뷰수', 'url', '숙소_특징', '12개월_리뷰수', '예약가능여부', '마지막_리뷰',
       'Host greets you', 'Garden view', 'Baby bath - always at the listing',
       'Baking sheet', 'Wine cellar', 'EV charger - level 2, tesla only',
       'Dryer – In unit', 'Shared outdoor kitchen', 'River view', 'Bikes',
       'Courtyard view', 'Shared gym nearby', 'Gated community',
       'First aid kit', 'Baby monitor', 'Kitchenette',
       'Spa services – included with your stay', 'Gym nearby', 'Ironing board',
       'Building staff', 'Washer', 'Table corner guards', 'Canal view',
       'Paid washer – In unit', 'Sauna', 'Private BBQ grill: charcoal',
       'Backyard - Fully fenced', 'Projector and screen', 'Cooking basics',
       'Children’s books and toys for ag