In [12]:
import pandas as pd
import plotly.express as px

# 1) Load yearly ridership data
# Assumes the CSV file contains a 'Year' column (covering 2019 ~ 2024)
boarding_data = pd.read_csv('/content/sample_data/Subway_Line2_BoardingAlighting_Preprocessed.csv')

# Group by 'Year' and station name, then summarize boarding/alighting
boarding_year_summary = boarding_data.groupby(['Year', '역명']).agg({
    '승차총승객수': 'sum',
    '하차총승객수': 'sum'
}).reset_index()

# Calculate total passengers
boarding_year_summary['TotalPassengers'] = (
    boarding_year_summary['승차총승객수'] + boarding_year_summary['하차총승객수']
)

# 2) Load station coordinate data
coords_data = pd.read_csv('/content/sample_data/Line2_Coordinates.csv')

# 3) Map Korean station names to English and assign ShortCode
station_name_map = {
    "시청": "City Hall",
    "을지로입구": "Euljiro 1(il)-ga",
    "을지로3가": "Euljiro 3-ga",
    "충정로": "Chungjeongno",
    "동대문역사문화공원": "Dongdaemun History & Culture Park",
    "신당": "Sindang",
    "상왕십리": "Sangwangsimni",
    "왕십리": "Wangsimni",
    "한양대": "Hanyang University",
    "뚝섬": "Ttukseom",
    "성수": "Seongsu",
    "건대입구": "Konkuk University",
    "구의": "Guui",
    "강변": "Gangbyeon",
    "잠실": "Jamsil",
    "잠실나루": "Jamsilnaru",
    "잠실새내": "Jamsilsaenae",
    "종합운동장": "Sports Complex",
    "삼성": "Samseong",
    "선릉": "Seolleung",
    "역삼": "Yeoksam",
    "강남": "Gangnam",
    "교대": "Gyodae",
    "서초": "Seocho",
    "방배": "Bangbae",
    "사당": "Sadang",
    "낙성대": "Nakseongdae",
    "서울대입구": "Seoul Natl Univ. Entrance",
    "봉천": "Bongcheon",
    "신림": "Sillim",
    "신대방": "Sindebang",
    "구로디지털단지": "Guro Digital Complex",
    "대림": "Daerim",
    "신도림": "Sindorim",
    "영등포구청": "Yeongdeungpo-gu Office",
    "문래": "Mullae",
    "신길": "Singil",
    "여의도": "Yeouido",
    "국회의사당": "National Assembly",
    "당산": "Dangsan",
    "합정": "Hapjeong",
    "홍대입구": "Hongik University",
    "신촌": "Sinchon",
    "이대": "Ewha Womans Univ.",
    "아현": "Ahyeon",
    "을지로4가": "Euljiro 4-ga",
    "신설동": "Sinseol-dong",
    "용두": "Yongdu",
    "신답": "Sindap",
    "용답": "Yongdap",
    "도림천": "Dorimcheon",
    "양천구청": "Yangcheon-gu Office",
    "신정네거리": "Sinjeongnegeori",
    "까치산": "Kkachisan"
}
coords_data['English'] = coords_data['역명'].map(station_name_map)

shortcode_map = {
    "City Hall": "CH",
    "Euljiro 1(il)-ga": "E1",
    "Euljiro 3-ga": "E3",
    "Chungjeongno": "CJ",
    "Dongdaemun History & Culture Park": "DHCP",
    "Sindang": "SD",
    "Sangwangsimni": "SW",
    "Wangsimni": "WS",
    "Hanyang University": "HYU",
    "Ttukseom": "TT",
    "Seongsu": "SS",
    "Konkuk University": "KU",
    "Guui": "GU",
    "Gangbyeon": "GB",
    "Jamsil": "JS",
    "Jamsilnaru": "JNR",
    "Jamsilsaenae": "JSN",
    "Sports Complex": "SC",
    "Samseong": "SMG",
    "Seolleung": "SL",
    "Yeoksam": "YS",
    "Gangnam": "GN",
    "Gyodae": "GD",
    "Seocho": "SCH",
    "Bangbae": "BB",
    "Sadang": "SDG",
    "Nakseongdae": "NSD",
    "Seoul Natl Univ. Entrance": "SNU",
    "Bongcheon": "BC",
    "Sillim": "SLM",
    "Sindebang": "SDB",
    "Guro Digital Complex": "GDC",
    "Daerim": "DRM",
    "Sindorim": "SDR",
    "Yeongdeungpo-gu Office": "YGO",
    "Mullae": "ML",
    "Singil": "SG",
    "Yeouido": "YID",
    "National Assembly": "NA",
    "Dangsan": "DS",
    "Hapjeong": "HJ",
    "Hongik University": "HKU",
    "Sinchon": "SIN",
    "Ewha Womans Univ.": "EWU",
    "Ahyeon": "AH",
    "Euljiro 4-ga": "E4",
    "Sinseol-dong": "SSD",
    "Yongdu": "YD",
    "Sindap": "SDP",
    "Yongdap": "YDP",
    "Dorimcheon": "DRC",
    "Yangcheon-gu Office": "YCO",
    "Sinjeongnegeori": "SJN",
    "Kkachisan": "KCS"
}
coords_data['ShortCode'] = coords_data['English'].map(shortcode_map)

# 4) Merge coordinate data and yearly ridership
merged_data = pd.merge(coords_data, boarding_year_summary, on='역명')

# Check for stations without ShortCode
missing = merged_data[merged_data['ShortCode'].isna()][['역명','English']]
print("Stations without ShortCode:\n", missing)

# 5) Create an animated map by year (2019–2024)
#    Marker size and color are based on 'TotalPassengers'
#    Display ShortCode above each marker
fig = px.scatter_mapbox(
    merged_data,
    lat="위도",
    lon="경도",
    color="TotalPassengers",
    color_continuous_scale=px.colors.sequential.Greens,
    size="TotalPassengers",
    hover_name="English",
    hover_data={"TotalPassengers": True, "ShortCode": True},
    zoom=10,
    title="Seoul Subway Line 2: Yearly Marker Gradient by TotalPassengers",
    text="ShortCode",
    animation_frame="Year"
)

# Ensure markers display text
fig.update_traces(
    mode='markers+text',
    textposition='top center',
    textfont=dict(color="black", size=14)
)

# Configure map layout
fig.update_layout(
    mapbox_style="carto-positron",
    mapbox=dict(
        center=dict(lat=37.55, lon=126.97),
        zoom=11
    ),
    title_x=0.5,
    width=1200,
    height=900
)

# Show interactive map
fig.show()


Stations without ShortCode:
 Empty DataFrame
Columns: [역명, English]
Index: []
