In [None]:
import pandas as pd

def convert_list_column_to_binary(df, column_name):
    """
    將包含列表的欄位轉換為多個二元（yes/no）欄位
    
    Parameters:
    df: DataFrame
    column_name: 要處理的欄位名稱
    
    Returns:
    DataFrame: 處理後的 DataFrame
    """
    # 創建臨時列表欄位
    temp_col_name = f'{column_name}_list'
    df[temp_col_name] = df[column_name].str.strip('[]').str.replace("'", "").str.split(', ')
    
    # 獲取所有可能的項目
    all_items = set()
    for items in df[temp_col_name].dropna():
        if isinstance(items, list):
            all_items.update(items)
    
    # 為每個項目創建新的欄位
    for item in all_items:
        # 創建合法的欄位名稱（移除特殊字符）
        clean_item = item.replace(' ', '_').replace('&', 'and')
        df[f'{column_name}_{clean_item}'] = df[temp_col_name].apply(
            lambda x: 'yes' if isinstance(x, list) and item in x else 'no'
        )
    
    # 刪除臨時欄位
    df = df.drop(temp_col_name, axis=1)
    
    return df, sorted(all_items)

# 讀取 CSV 文件
df = pd.read_csv('hostels_original.csv')

# 處理所有指定的欄位
columns_to_process = ['Free', 'General', 'Services', 'Food & Drink', 'Entertainment']

# 存儲每個類別的項目，用於後續查看
items_by_category = {}

# 處理每個欄位
for column in columns_to_process:
    df, items = convert_list_column_to_binary(df, column)
    items_by_category[column] = items

# 儲存結果
df.to_csv('hostels_processed.csv', index=False)

# 打印每個類別的所有項目（可選）
for category, items in items_by_category.items():
    print(f"\n{category} 類別的所有項目：")
    for item in items:
        print(f"- {item}")

# 檢查結果（可選）
# 對於每個類別，顯示原始欄位和新增的欄位的前幾行
for category in columns_to_process:
    print(f"\n{category} 相關欄位的前幾行：")
    related_columns = [category] + [col for col in df.columns if col.startswith(f'{category}_')]
    print(df[related_columns].head())


Free 類別的所有項目：
- Free Airport Transfer
- Free Breakfast
- Free City Maps
- Free City Tour
- Free Internet Access
- Free Parking
- Free WiFi
- Linen Included
- Towels Included

General 類別的所有項目：
- "Childrens Play Area"
- Adaptors
- Air Conditioning
- BBQ
- Bicycle Parking
- Book Exchange
- Breakfast Not Included
- Cable TV
- Card Phones
- Ceiling Fan
- Common Room
- Cooker
- Dishwasher
- Dryer
- Elevator
- Fitness Centre
- Flexible NRR
- Follows Covid-19 sanitation guidance
- Fridge/Freezer
- Hair Dryers
- Hair Dryers For Hire
- Hot Showers
- Hot Tub
- Iron / Ironing Board
- Jobs Board
- Key Card Access
- Linen Not Included
- Meeting Rooms
- Microwave
- Outdoor Terrace
- Parking
- Reading Light
- Safe Deposit Box
- Security Lockers
- Self-Catering Facilities
- Towels Not Included
- Utensils
- Washing Machine
- Wheelchair Friendly

Services 類別的所有項目：
- 24 Hour Reception
- 24 Hour Security
- Beauty Salon
- Bicycle Hire
- Currency Exchange
- Express check-in / out
- Fax Service
- Housekeepin

In [42]:
import pandas as pd

# 讀取CSV文件
df = pd.read_csv('hostels_processed.csv')

# 1. 列出所有column名稱和總數
all_columns = df.columns.tolist()
total_columns = len(all_columns)
print(f"1. 總欄位統計")
print(f"　 總欄位數：{total_columns}")
print("   所有欄位名稱")
for col in all_columns:
    print("  ", col)

print("\n2. 各類型欄位統計")


# 定義要搜尋的前綴
prefixes = ['free_', 'general_', 'service_', 'foodDrink_', 'entertainment_']

# 對每個前綴進行統計
for prefix in prefixes:
    # 找出符合前綴的欄位
    matching_columns = [col for col in all_columns if col.startswith(prefix)]
    
    print(f"\n   {prefix}相關欄位: {len(matching_columns)}個")
    for col in matching_columns:
        print("  ", col)


1. 總欄位統計
　 總欄位數：96
   所有欄位名稱
   hostel_name
   hostel_link
   city
   price
   distance
   summary_score
   rating_band
   value_for_money
   security
   location
   staff
   atmosphere
   cleanliness
   facilities
   Free
   General
   Services
   Food & Drink
   Entertainment
   latitude
   longitude
   free_breakfast
   free_parking
   free_cityMaps
   free_wifi
   free_cityTour
   free_linen
   free_internetAccess
   free_towel
   free_airportTransfer
   general_outdoorTerrace
   general_washingMachine
   general_cardPhones
   general_iron
   general_hairDryers
   general_utensils
   general_meetingRoom
   general_commonRoom
   general_readingLight
   general_cableTV
   general_airConditioning
   general_jobBoard
   general_securityLocker
   general_adaptor
   general_elevator
   general_covid19Guide
   general_microwave
   general_ceilingFan
   general_wheelchairFriendly
   general_flexibleNRR
   general_bicycleParking
   general_cooker
   general_safeDepositBox
   general_selfCat