## Data Source

[The Citi Bike Data webpage](https://www.citibikenyc.com/system-data)

City Bike files from the year 2022 were downloaded into Resources folder (12 files).

## Dependencies

In [1]:
import pandas as pd
import os
import glob
import numpy as np

# Calculate geo distance between two geographic points with coordinates
from geopy.distance import distance

In [2]:
# Function to get geo distance between start station and end station

def geo_distance(row):
    coord_a = (row['start_lat'], row['start_lng'])
    coord_b = (row['end_lat'], row['end_lng'])
    return distance(coord_a, coord_b).miles  

## Preparing data for inputting to Tableau

In [3]:
# Initialize dataframe
df_jc = pd.DataFrame()

path = "./Resources-gd"
# Check whether the specified path exists or not
isExist = os.path.exists(path)
if not isExist:

   # Create a new directory because it does not exist
   os.makedirs(path)
   print("The new directory is created!")

# Loop through all JC .csv files
for file_name in glob.glob('./Resources/JC-2022*.csv'):
    # Read .csv files
    df = pd.read_csv(file_name)
    print(50*'-')
    print(f"File Name: {file_name} \nDF shape: {df.shape}")

    df['end_station_name'] = df['end_station_name'].str.strip('')
    df.dropna(inplace=True)

    # Calculate geo distance between start and stations
    df['geodistance (miles)'] = df.apply(geo_distance, axis=1)
    # Move the new column "geo distance" to index 1
    col = df.pop('geodistance (miles)')
    df.insert(1, 'geodistance (miles)', col)
    # Calculating Geo distance is time consuming --> save df to csv file
    newft = file_name.split('\\')
    newf = f"{newft[0]}-gd/{newft[1]}"
    df.to_csv(newf, index=False)
    
    # Join iterated dataframe with previuos one
    df_jc = pd.concat([df_jc, df]).reset_index(drop=True)
    print(f"Current joined DF shape: {df_jc.shape}")
    
print(50*'=')
print(f"Final joined DF shape: {df_jc.shape}")
# Save the dataframe to csv file
file_name = f"./Resources-gd/JC-2022-citibike-tripdata-gd.csv"
df_jc.to_csv(file_name, index=False)

--------------------------------------------------
File Name: ./Resources\JC-202201-citibike-tripdata.csv 
DF shape: (26762, 13)
Current joined DF shape: (26359, 14)
--------------------------------------------------
File Name: ./Resources\JC-202202-citibike-tripdata.csv 
DF shape: (31911, 13)
Current joined DF shape: (58154, 14)
--------------------------------------------------
File Name: ./Resources\JC-202203-citibike-tripdata.csv 
DF shape: (51671, 13)
Current joined DF shape: (109742, 14)
--------------------------------------------------
File Name: ./Resources\JC-202204-citibike-tripdata.csv 
DF shape: (62528, 13)
Current joined DF shape: (172141, 14)
--------------------------------------------------
File Name: ./Resources\JC-202205-citibike-tripdata.csv 
DF shape: (80482, 13)
Current joined DF shape: (252437, 14)
--------------------------------------------------
File Name: ./Resources\JC-202206-citibike-tripdata.csv 
DF shape: (103299, 13)
Current joined DF shape: (355422, 14)

In [4]:
df_jc.head()

Unnamed: 0,ride_id,geodistance (miles),rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual
0,CA5837152804D4B5,0.0,electric_bike,2022-01-26 18:50:39,2022-01-26 18:51:53,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member
1,BA06A5E45B6601D2,0.0,classic_bike,2022-01-28 13:14:07,2022-01-28 13:20:23,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member
2,7B6827D7B9508D93,0.0,classic_bike,2022-01-10 19:55:13,2022-01-10 20:00:37,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member
3,6E5864EA6FCEC90D,0.0,electric_bike,2022-01-26 07:54:57,2022-01-26 07:55:22,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member
4,E24954255BBDE32D,0.0,electric_bike,2022-01-13 18:44:46,2022-01-13 18:45:43,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member
