# Duration

In [43]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('data/ridership.csv')

# Filtering for trips less n seconds
max_duration = 3600 

df_filtered = df[df['duration'] < max_duration][['id', 'duration', 'bike_model']]

# Bin size (in seconds)
bins = range(0, df_filtered['duration'].max() + 30, 30) 

labels = [f"{i}-{i+9}" for i in bins[:-1]]

df_filtered['duration_bin'] = pd.cut(df_filtered['duration'], bins=bins, right=False)

duration_counts = df_filtered.groupby(['duration_bin', 'bike_model']).size().unstack(fill_value=0)

duration_counts_df = duration_counts.reset_index()

# Interval start
duration_counts_df['interval_start'] = duration_counts_df['duration_bin'].apply(lambda x: int(x.left))

# Normalize the "EFIT" column
total_efit = duration_counts_df['EFIT'].sum()
duration_counts_df['EFIT_normalized'] = duration_counts_df['EFIT'] / total_efit

# Normalize the "ICONIC" column
total_iconic = duration_counts_df['ICONIC'].sum()
duration_counts_df['ICONIC_normalized'] = duration_counts_df['ICONIC'] / total_iconic

duration_counts_df.to_csv('../../static/duration_counts.csv', index=False)

duration_counts_df


  duration_counts = df_filtered.groupby(['duration_bin', 'bike_model']).size().unstack(fill_value=0)


bike_model,duration_bin,EFIT,ICONIC,interval_start,EFIT_normalized,ICONIC_normalized
0,"[0, 30)",120,978,0,0.000929,0.001589
1,"[30, 60)",62,283,30,0.000480,0.000460
2,"[60, 90)",440,2692,60,0.003407,0.004373
3,"[90, 120)",747,4052,90,0.005784,0.006583
4,"[120, 150)",1007,6202,120,0.007798,0.010076
...,...,...,...,...,...,...
115,"[3450, 3480)",54,235,3450,0.000418,0.000382
116,"[3480, 3510)",57,266,3480,0.000441,0.000432
117,"[3510, 3540)",51,277,3510,0.000395,0.000450
118,"[3540, 3570)",54,259,3540,0.000418,0.000421


# Distance

In [56]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('data/ridership.csv')

# Filtering for trips less n meters
max_distance = 12000 

df_filtered = df[df['distance_average'] < max_distance][['id', 'distance_average', 'bike_model']]

# Bin size (in meters)
bins = range(0, int(df_filtered['distance_average'].max()) + 200, 200) 

labels = [f"{i}-{i+9}" for i in bins[:-1]]

df_filtered['distance_bin'] = pd.cut(df_filtered['distance_average'], bins=bins, right=False)

distance_counts = df_filtered.groupby(['distance_bin', 'bike_model']).size().unstack(fill_value=0)

distance_counts_df = distance_counts.reset_index()

# Interval start
distance_counts_df['interval_start'] = distance_counts_df['distance_bin'].apply(lambda x: int(x.left))

# Normalize the "EFIT" column
total_efit = distance_counts_df['EFIT'].sum()
distance_counts_df['EFIT_normalized'] = distance_counts_df['EFIT'] / total_efit

# Normalize the "ICONIC" column
total_iconic = distance_counts_df['ICONIC'].sum()
distance_counts_df['ICONIC_normalized'] = distance_counts_df['ICONIC'] / total_iconic

distance_counts_df.to_csv('../../static/distance_counts.csv', index=False)

distance_counts_df


  distance_counts = df_filtered.groupby(['distance_bin', 'bike_model']).size().unstack(fill_value=0)


bike_model,distance_bin,EFIT,ICONIC,interval_start,EFIT_normalized,ICONIC_normalized
0,"[0, 200)",364,1949,0,0.002767,0.0031
1,"[200, 400)",1100,7601,200,0.008363,0.012088
2,"[400, 600)",2105,16300,400,0.016004,0.025922
3,"[600, 800)",3267,23818,600,0.024838,0.037878
4,"[800, 1000)",4601,30592,800,0.03498,0.048651
5,"[1000, 1200)",5064,33921,1000,0.0385,0.053946
6,"[1200, 1400)",5944,37109,1200,0.045191,0.059015
7,"[1400, 1600)",6377,37502,1400,0.048482,0.05964
8,"[1600, 1800)",6411,36624,1600,0.048741,0.058244
9,"[1800, 2000)",6597,37262,1800,0.050155,0.059259


# Elevation