In [1]:
import pandas as pd

In [2]:
# Basic path for specific .csv files (quarter data)
basic_path = '../data/raw_data/'

# List of paths for specific .csv files
csv_path_list = [
    "2018Q3-citibike-tripdata-raw.csv", "2018Q4-citibike-tripdata-raw.csv",
    "2019Q1-citibike-tripdata-raw.csv", "2019Q2-citibike-tripdata-raw.csv"
]

In [3]:
# Loop through "csv_path_list"
for csv_path in csv_path_list:
    
    # .......... READ DATA .......... #
    # Concatenate "basic_path" and "csv_path" to make request url
    path = basic_path + csv_path

    # Read quarterly trip data into Pandas DataFrame
    quarterly_trip_df = pd.read_csv(path)        
    
    # .......... DATA CLEANING .......... # 
    # 1. Rename column names
    quarterly_trip_df = quarterly_trip_df.rename(columns = {
        "Date": "date",
        "Trips over the past 24-hours (midnight to 11:59pm)": "daily-trip",
        "Miles traveled today (midnight to 11:59 pm)": "daily-miles",
        "Total Annual Members (All Time)": "annual-member",
        "24-Hour Passes Purchased (midnight to 11:59 pm)": "day-pass",
        "3-Day Passes Purchased (midnight to 11:59 pm)": "3day-pass"
    })
    
    # 2. Change the data type of columns to save memory usage
    quarterly_trip_df["daily-trip"] = quarterly_trip_df["daily-trip"].astype("int32")
    quarterly_trip_df["daily-miles"] = quarterly_trip_df["daily-miles"].astype("int32")
    quarterly_trip_df["annual-member"] = quarterly_trip_df["annual-member"].astype("int32")
    quarterly_trip_df["day-pass"] = quarterly_trip_df["day-pass"].astype("int16")
    quarterly_trip_df["3day-pass"] = quarterly_trip_df["3day-pass"].astype("int16")
    
    # 3. Delete "Column 1" column for the second quarter of 2019
    if csv_path == "2019Q2-citibike-tripdata-raw.csv":
        quarterly_trip_df.drop(["Column 1"], axis=1, inplace=True)
    
    # Save "quarterly_trip_df" as csv file
    quarterly_trip_df.to_csv(f"../data/cleaned_data/{csv_path[:-8]}.csv", index=False, header=True)