In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
# Load the data
temp = pd.read_csv('/content/drive/MyDrive/B A 데이터 모음/00클러스터링/Preprocessed_datasets/OD_PEAK.csv')

In [None]:
temp

In [None]:
def remove_same_origin_dest_rows(df):
    return df[df['code_origin'] != df['code_dest']]

In [None]:
data = remove_same_origin_dest_rows(temp)

In [None]:
data.columns

In [None]:
data.drop(['Unnamed: 0','bus_and_subway'], axis=1, inplace= True)

In [None]:
data.code_dest.nunique()

In [None]:
data.shape

In [None]:
outgoing_traffic = data.groupby('code_origin').sum()
incoming_traffic = data.groupby('code_dest').sum()

In [None]:
total_traffic = outgoing_traffic.add(incoming_traffic, fill_value=0)
total_traffic

In [None]:
total_traffic.drop(['code_origin','code_dest'], axis=1, inplace=True)
total_traffic

In [None]:
total_traffic_sorted = total_traffic.sort_values(by='transportation_total', ascending=False)

In [None]:
total_traffic_sorted.head()

In [None]:
total_traffic_sorted.reset_index(inplace=True)

In [None]:
!pip install kneed

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from kneed import KneeLocator

# Style
sns.set(style="whitegrid", palette="muted")

# Data sorting and find knee point
sorted_data = total_traffic_sorted['transportation_total'].sort_values(ascending=False).values
knee_locator = KneeLocator(range(len(sorted_data)), sorted_data, curve='convex', direction='decreasing')
knee_point = knee_locator.knee

# Visualization
plt.figure(figsize=(12, 7))
plt.plot(range(len(sorted_data)), sorted_data, label="Total Traffic Volume", linewidth=2)
plt.scatter(knee_point, sorted_data[knee_point], color='red', s=150, zorder=5, label='Knee Point')
plt.title("Knee Point Visualization for High Traffic Volumes", fontsize=18, fontweight='bold')
plt.xlabel("Index", fontsize=14)
plt.ylabel("Traffic Volume", fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.annotate('Knee Point', xy=(knee_point, sorted_data[knee_point]), xytext=(knee_point+10, sorted_data[knee_point]+10),
             arrowprops=dict(facecolor='black', shrink=0.05), fontsize=12)

plt.legend()
plt.show()

# Knee Point 인덱스 출력
print("Knee Point Index:", knee_point)


In [None]:
data_up_to_knee = total_traffic_sorted.iloc[:knee_point+1]

In [None]:
data_up_to_knee

In [None]:
temp_unique = temp.drop_duplicates(subset='code_origin')[['code_origin', 'gu_origin', 'dong_origin']]

In [None]:
merged_data = pd.merge(data_up_to_knee, temp_unique, on='code_origin', how='left')

In [None]:
merged_data

In [None]:
lldata = pd.read_csv("/content/drive/MyDrive/B A 데이터 모음/00클러스터링/위경도.csv")

In [None]:
lldata.head()

Merge data_up_to_knee and temp based on the 'code_origin' and 'code' columns, using a left join.

In [None]:
final_data = merged_data.merge(lldata[['dong', 'x', 'y']], left_on='dong_origin', right_on='dong', how='left')

In [None]:
final_data.drop('dong', axis=1)

In [None]:
 !pip install folium

In [None]:
import folium
# 중심 지정
lat = final_data['x'].mean()
long = final_data['y'].mean()

In [None]:
m = folium.Map([lat, long], zoom_start=11)

In [None]:
# 지도위에 표시
for i in final_data.index:
    sub_lat =final_data.loc[i, 'x']
    sub_long = final_data.loc[i, 'y']

    title = final_data.loc[i, 'dong_origin']

    #지도에 데이터 찍어서 보여주기
    folium.Marker([sub_lat, sub_long], tooltip = title).add_to(m)

In [None]:
m.save('knee.html')
m

## Another way to visualize

In [None]:
import folium

# 중심 지정
lat = final_data['x'].mean()
long = final_data['y'].mean()
m = folium.Map([lat, long], zoom_start=11)

# 통행량 데이터의 최소값과 최대값을 구합니다.
min_transportation_total = final_data['transportation_total'].min()
max_transportation_total = final_data['transportation_total'].max()

# 원의 최소 크기와 최대 크기를 설정합니다.
min_radius = 5  # 최소 원의 크기
max_radius = 30  # 최대 원의 크기

# 지도위에 표시
for i in final_data.index:
    sub_lat = final_data.loc[i, 'x']
    sub_long = final_data.loc[i, 'y']
    title = final_data.loc[i, 'dong_origin']
    transportation_total = final_data.loc[i, 'transportation_total']

    # 통행량 데이터의 범위에 따라 원의 크기를 조정
    radius = min_radius + (max_radius - min_radius) * ((transportation_total - min_transportation_total) / (max_transportation_total - min_transportation_total))

    # CircleMarker를 사용하여 원을 그림
    folium.CircleMarker(
        location=[sub_lat, sub_long],
        radius=radius,
        tooltip=title,
        fill=True,
        color='blue',
        fill_opacity=0.6
    ).add_to(m)

m.save('knee_circle.html')
m
