In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
csv_file = '/content/hk_restos_without_none_district.csv'

In [None]:
df = pd.read_csv(csv_file)

In [None]:
df

In [None]:
district_mapping = {
    'Central and Western District': ['Central', 'Mid-Levels', 'Sheung Wan', 'Sai Ying Pun', 'Kennedy Town', 'Shek Tong Tsui', 'Western District', 'Admiralty', 'The Peak', 'Wing Lok Street', 'Sai Wan'],
    'Eastern District': ['Chai Wan', 'Shau Kei Wan', 'Sai Wan Ho', 'Quarry Bay', 'North Point', 'Fortress Hill', 'Tin Hau', 'Tai Koo', 'Heng Fa Chuen', 'Siu Sai Wan'],
    'Southern District': ['Repulse Bay', 'Aberdeen', 'Ap Lei Chau', 'Pok Fu Lam', 'Wong Chuk Hang', 'Tin Wan', 'Stanley', 'Old Main Street'],
    'Wan Chai District': ['Wan Chai', 'Causeway Bay', 'Happy Valley', 'Tai Hang', 'Lockhart Road', 'Wai Chai'],
    'Sham Shui Po District': ['Sham Shui Po', 'Cheung Sha Wan', 'Lai Chi Kok', 'Shek Kip Mei', 'Mei Foo', 'Shum Shui Po', 'Lai Chi Lok'],
    'Kowloon City District': ['Kowloon City', 'To Kwa Wan', 'Hung Hom', 'Ho Man Tin', 'Kai Tak', 'Kowloon Tong'],
    'Kwun Tong District': ['Kwun Tong', 'Lam Tin', 'Yau Tong', 'Ngau Tau Kok', 'Kowloon Bay', 'San Po Kong', 'Lei Yue Mun', 'Hung To Road', 'Hang On Street Kowloon', 'Choi Hung', 'Po Tat'],
    'Wong Tai Sin District': ['Wong Tai Sin', 'Diamond Hill', 'Lok Fu', 'Ngau Chi Wan', 'Tsz Wan Shan'],
    'Yau Tsim Mong District': ['Mong Kok', 'Tsim Sha Tsui', 'Jordan', 'Yau Ma Tei', 'Prince Edward', 'Tai Kok Tsui'],
    'Islands District': ['Tung Chung', 'Discovery Bay', 'Chek Lap Kok'],
    'Kwai Tsing District': ['Tsing Yi', 'Kwai Chung', 'Kwai Fong', 'Kwai Hing'],
    'North District': ['Sheung Shui', 'Fanling'],
    'Sai Kung District': ['Sai Kung', 'Tseung Kwan O', 'Hang Hau', 'Po Lam'],
    'Sha Tin District': ['Sha Tin', 'Ma On Shan', 'Tai Wai', 'Shek Mun', 'Fo Tan'],
    'Tai Po District': ['Tai Po', 'Tai Wo', 'Pak Shek Kok'],
    'Tsuen Wan District': ['Tsuen Wan', 'Sham Tseng'],
    'Tuen Mun District': ['Tuen Mun'],
    'Yuen Long District': ['Yuen Long', 'Tin Shui Wai', 'Kam Tin']
}
#if the area is not in the specific district, it will be deleted.

In [None]:
def map_to_admin_district(district):
    for admin_district, locations in district_mapping.items():
        if district in locations:
            return admin_district
    return 'Unknown'

df['Admin_District'] = df['District'].apply(map_to_admin_district)

In [None]:
print("undefined areas：")
print(df[df['Admin_District'] == 'Unknown']['District'].unique())
#Tai Wa -> 1 restaruant

In [None]:
district_counts = df['Admin_District'].value_counts()

In [None]:
#figure: bar chart
plt.figure(figsize=(12, 6))
district_counts.plot(kind='bar', color='skyblue')
plt.title('Number of Restaurants per District in Hong Kong')
plt.xlabel('Administrative District')
plt.ylabel('Number of Restaurants')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
#figure: pie chart
plt.figure(figsize=(15, 10))
plt.pie(district_counts, labels=district_counts.index, autopct='%1.1f%%', startangle=90)
plt.title('Restaurant Distribution Across Hong Kong Districts')
plt.axis('equal')
plt.show()

In [None]:
district_counts = df['Admin_District'].value_counts()
print("\n The number of restraurant in each area：")
print(district_counts)

In [None]:
if 'AverageRating' in df.columns:
    district_ratings = df.groupby('Admin_District')['AverageRating'].mean().sort_values(ascending=False)
    print("\nThe average rating in each districts：")
    print(district_ratings)

In [None]:
district_summary = df.groupby('Admin_District').agg({
    'StoreId': 'count',
    'AverageRating': 'mean',
    'Reviewers': 'sum'
}).rename(columns={'StoreId': 'Restaurant_Count', 'AverageRating': 'Avg_Rating', 'Reviewers': 'Total_Reviewers'})

district_summary = district_summary[district_summary.index != 'Unknown']

print("\n每個行政區的數據總結：")
print(district_summary)

In [None]:
fig, ax1 = plt.subplots(figsize=(14, 7))
ax1.bar(district_summary.index, district_summary['Restaurant_Count'], color='skyblue', label='Restaurant Count')
ax1.set_xlabel('Administrative District')
ax1.set_ylabel('Number of Restaurants', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')
plt.xticks(rotation=45, ha='right')

In [None]:
ax2 = ax1.twinx()
ax2.plot(district_summary.index, district_summary['Avg_Rating'], color='orange', marker='o', label='Avg Rating')
ax2.set_ylabel('Average Rating', color='orange')
ax2.tick_params(axis='y', labelcolor='orange')

In [None]:
plt.title('Restaurant Count and Average Rating per District in Hong Kong')
fig.tight_layout()
plt.show()

In [None]:
new_file = '18_district.csv'
df.to_csv(new_file, index=False)
