In [None]:
import pandas as pd
from pathlib import Path
import googlemaps
from datetime import datetime
import pytz

# Load your Uber rides data into a DataFrame
df = pd.read_csv("Resources/uber.csv")

# Filter and clean the DataFrame
df_filtered = df[(df['fare_amount'] >= 0) & 
                 (df['pickup_longitude'] != 0) & 
                 (df['pickup_latitude'] != 0) & 
                 (df['dropoff_longitude'] != 0) & 
                 (df['dropoff_latitude'] != 0) & 
                 (df['passenger_count'] != 0)]

# Sort the DataFrame based on passenger_count
df_sorted = df_filtered.sort_values(by='passenger_count')

# Convert the 'key' column to datetime
df_sorted['key'] = pd.to_datetime(df_sorted['key'])

# Set the time zone for the 'key' column to UTC
df_sorted['key'] = df_sorted['key'].dt.tz_localize('UTC')

# Convert the 'key' column to NYC time
nyc_tz = pytz.timezone('America/New_York')
df_sorted['key'] = df_sorted['key'].dt.tz_convert(nyc_tz)

# Create new columns for date, time, day, month, and year
df_sorted['date'] = df_sorted['key'].dt.date
df_sorted['time'] = df_sorted['key'].dt.time
df_sorted['day'] = df_sorted['key'].dt.day
df_sorted['month'] = df_sorted['key'].dt.month
df_sorted['year'] = df_sorted['key'].dt.year

gmaps = googlemaps.Client(key='AIzaSyAzXHrKF6uJ6LeOMtJTnUFllSf9hT-Pz1I')

def calculate_distance(row):
    origin = (row['pickup_latitude'], row['pickup_longitude'])
    destination = (row['dropoff_latitude'], row['dropoff_longitude'])
    
    # Use the Directions API to get distance information
    result = gmaps.directions(origin, destination, mode="driving", departure_time=datetime.now())
    
    # Check if the API call was successful and returned a route
    if result and 'legs' in result[0] and 'distance' in result[0]['legs'][0]:
        distance = result[0]['legs'][0]['distance']['text']
        return distance
   
    
df_sorted['road_distance'] = df_sorted.apply(calculate_distance, axis=1)


# Drop the original 'key', 'pickup_datetime', and 'Unnamed: 0' columns
df_sorted = df_sorted.drop(columns=['key', 'pickup_datetime', 'Unnamed: 0'])
df_sorted
df_sorted.to_excel('uber_rides_distance_calculated.xlsx', index=False)

In [None]:
df_sorted.head(10)