# 따릉이 데이터 분석과 시각화

## 데이터 수집
- 서울 열린 데이터 광장: https://data.seoul.go.kr/
- Download 받아야 할 자료
  - 서울시 공공자전거 대여이력 정보
  - 서울시 공공자전거 대여서 정보

In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import folium

In [None]:
df = pd.read_csv(
    "./data/bike_2502.csv",
    encoding="cp949",
    parse_dates=["대여일시", "반납일시"],
    date_format="%Y-%m-%d %H:%M:%S",
)

In [None]:
print(df.info())
print(df.sample(10))

In [None]:
rent_location = pd.read_excel(
    "./data/bike_rent_location_2512.xlsx",
    sheet_name="대여소현황",
    skiprows=5,
    engine="openpyxl",
    header=None,
    index_col=None,
    names=[
        "대여소번호",
        "대여소명",
        "자치구",
        "상세주소",
        "위도",
        "경도",
        "설치시기",
        "LCD",
        "QR",
        "운영방식",
    ],
)
print(rent_location.info())
print(rent_location.head())

In [None]:
# Add latitude, longitude, and district columns to rent_history for both rental and return stations
def add_lat_lon_to_rent(rent, location):
    #
    location = location.copy()
    location["대여소번호"] = location["대여소번호"].apply(lambda x: f"{x:05d}")
    # Prepare location columns to merge
    loc_cols = ["대여소번호", "자치구", "위도", "경도"]
    # Merge for rental station
    rent = rent.merge(
        location[loc_cols].rename(
            columns={
                "대여소번호": "대여 대여소번호",
                "자치구": "대여 자치구",
                "위도": "대여 위도",
                "경도": "대여 경도",
            }
        ),
        on="대여 대여소번호",
        how="left",
    )
    # Merge for return station
    rent = rent.merge(
        location[loc_cols].rename(
            columns={
                "대여소번호": "반납대여소번호",
                "자치구": "반납 자치구",
                "위도": "반납 위도",
                "경도": "반납 경도",
            }
        ),
        on="반납대여소번호",
        how="left",
    )
    return rent


combined_with_location = add_lat_lon_to_rent(df, rent_location)
print(combined_with_location.info())

In [None]:
def add_dayofweek_and_weekend(df):
    """
    Adds '요일' (day of week in Korean) and '주말' (is weekend, bool) columns based on '대여일시'.
    """
    # 요일: 월, 화, 수, 목, 금, 토, 일
    days_kr = ["월", "화", "수", "목", "금", "토", "일"]
    df["요일"] = df["대여일시"].dt.dayofweek.map(lambda x: days_kr[x])
    df["주말"] = df["대여일시"].dt.dayofweek >= 5
    return df


# Add 요일 and 주말 columns
combined_with_location = add_dayofweek_and_weekend(combined_with_location)
print("Combined DataFrame with location and day columns:")
print(combined_with_location[["대여일시", "요일", "주말"]].head())
print(combined_with_location.info())

In [None]:
import matplotlib

matplotlib.rc("font", family="NanumGothic")

In [None]:
def plot_dayofweek_count(df):
    """
    Plots the count of rentals for each day of the week.
    """
    plt.figure(figsize=(8, 5))
    order = ["월", "화", "수", "목", "금", "토", "일"]
    sns.countplot(data=df, x="요일", order=order)
    plt.title("요일별 대여 건수")
    plt.xlabel("요일")
    plt.ylabel("건수")
    plt.show()


# Visualize rental counts by day of the week
plot_dayofweek_count(combined_with_location)

In [None]:
# Create a DataFrame with '요일' as columns and '대여시간대' (rental hour) as index
def make_pivot_by_hour_and_day(df):
    df["대여시간대"] = df["대여일시"].dt.hour
    # Use groupby and unstack to count rentals per hour and day
    pivot = df.groupby(["대여시간대", "요일"]).size().unstack(fill_value=0)
    return pivot


# Create pivot table by hour and day
pivot_df = make_pivot_by_hour_and_day(combined_with_location)
print(pivot_df.head())

In [None]:
def draw_heatmap(pivot_df, title, xlabel, ylabel):
    """
    Draws a heatmap of the pivot DataFrame with custom title, xlabel, and ylabel.
    Fixes Korean font issue for matplotlib.
    """
    plt.figure(figsize=(10, 6))
    sns.heatmap(
        pivot_df,
        annot=True,
        fmt="d",
        cmap="YlGnBu",
        cbar_kws={"label": "건수"},
        xticklabels=True,
        yticklabels=True,
    ).set(title=title, xlabel=xlabel, ylabel=ylabel)
    plt.show()


draw_heatmap(pivot_df, "대여시간대별 요일별 대여 건수 Heatmap", "요일", "대여시간대")

In [None]:
def plot_usage_by_weekend(df):
    """
    Plots the trend of rental counts by hour for each '자치구', separated by weekday and weekend.
    Shows two axes: one for 평일, one for 주말.
    """
    plt.figure(figsize=(18, 7))
    # Prepare data
    df_grouped = (
        df.groupby(["대여시간대", "주말", "대여 자치구"])
        .size()
        .reset_index(name="count")
    )
    # Split data for 평일/주말
    weekday = df_grouped[df_grouped["주말"] == False]
    weekend = df_grouped[df_grouped["주말"] == True]

    # Create subplots
    fig, axes = plt.subplots(1, 2, figsize=(18, 7), sharey=True)
    sns.lineplot(data=weekday, x="대여시간대", y="count", hue="대여 자치구", ax=axes[0])
    axes[0].set_title("평일 대여시간대별 자치구별 대여 건수")
    axes[0].set_xlabel("대여시간대")
    axes[0].set_ylabel("대여 건수")
    axes[0].legend(title="자치구", bbox_to_anchor=(1.05, 1), loc="upper left")

    sns.lineplot(data=weekend, x="대여시간대", y="count", hue="대여 자치구", ax=axes[1])
    axes[1].set_title("주말 대여시간대별 자치구별 대여 건수")
    axes[1].set_xlabel("대여시간대")
    axes[1].set_ylabel("대여 건수")
    axes[1].legend(title="자치구", bbox_to_anchor=(1.05, 1), loc="upper left")

    plt.tight_layout()
    plt.show()


plot_usage_by_weekend(combined_with_location)

In [None]:
def draw_choropleth_by_gu(df, geojson_path, value_col, legend_name):
    # Aggregate data by '자치구'
    data_by_gu = df.groupby("대여 자치구")[value_col].count().reset_index()
    data_by_gu.columns = ["대여 자치구", value_col]

    # Center of Seoul
    seoul_center = [37.5665, 126.9780]
    m = folium.Map(location=seoul_center, zoom_start=11)

    folium.Choropleth(
        geo_data=geojson_path,
        data=data_by_gu,
        columns=["대여 자치구", value_col],
        key_on="feature.properties.name",
        fill_color="YlGnBu",
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=legend_name,
    ).add_to(m)

    return m


m = draw_choropleth_by_gu(
    combined_with_location, "data/seoul.json", "자전거번호", "대여 건수"
)
m
# m.save('seoul_bike_choropleth.html')