In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 获取所有 CSV 文件的路径
file_path = './data/2019-citibike-tripdata/1_January/201901-citibike-tripdata_1.csv'

# 读取并合并所有 CSV 文件
data = pd.read_csv(file_path)

In [2]:
# 计算每个站点的开始和结束骑行次数
start_station_counts = data.groupby(['start station id', 'start station name']).size().reset_index(name='ride start count')
end_station_counts = data.groupby(['end station id', 'end station name']).size().reset_index(name='ride end count')

# 合并开始和结束骑行次数
station_activity = pd.merge(start_station_counts, end_station_counts,
                            left_on='start station id', right_on='end station id',
                            how='outer').fillna(0)

# 计算总骑行活动
station_activity['ride activity'] = station_activity['ride start count'] + station_activity['ride end count']

# 重命名列
station_activity = station_activity.rename(columns={
    'start station name': 'station name'
})

In [3]:
# 统计唯一的起始站点和结束站点的个数
unique_start_stations = data['start station id'].nunique()
unique_end_stations = data['end station id'].nunique()

# 打印结果
print(f"Unique start stations: {unique_start_stations}")
print(f"Unique end stations: {unique_end_stations}")

# 或者，如果你想统计所有站点的总数（不考虑起始或结束），可以合并后去重
all_stations = pd.concat([data['start station id'], data['end station id']]).nunique()
print(f"Total unique stations: {all_stations}")


Unique start stations: 767
Unique end stations: 773
Total unique stations: 773
