# Import Statements

In [28]:
import pandas as pd 

# Data Loading

In [31]:
  # Convert the content to a pandas DataFrame
taxi_df = pd.read_parquet("green_tripdata_2024-09.parquet")
weather_df = pd.read_csv("nyc_weather.csv")

# Taxi Data Handling

In [34]:
# Green Taxi Data Handling
taxi_df['lpep_pickup_datetime'] = pd.to_datetime(taxi_df['lpep_pickup_datetime'])
taxi_df['lpep_dropoff_datetime'] = pd.to_datetime(taxi_df['lpep_dropoff_datetime'])
taxi_df['RatecodeID'] = taxi_df['RatecodeID'].astype(str)
taxi_df['payment_type'] = taxi_df['payment_type'].astype(str)
taxi_df['trip_type'] = taxi_df['trip_type'].astype(str)

# Null Handling
taxi_df = taxi_df.drop(columns = 'ehail_fee')
taxi_df = taxi_df.dropna()

# Data Bucketing
taxi_df['source_date'] = pd.to_datetime(taxi_df['lpep_pickup_datetime'].dt.date)
# Extracting hour component from timestamp
taxi_df['hour'] = taxi_df['source_date'].dt.hour

# Weather Data Handling

In [37]:
# Weather Data Handling
weather_df = weather_df[['datetime','temp', 'feelslike', 'dew', 'humidity','precip', 'snow', 'windspeed', 'conditions' ,'icon']]
weather_df['datetime'] = pd.to_datetime(weather_df['datetime'])
weather_df = weather_df[(weather_df['datetime'].dt.year == 2024) & (weather_df['datetime'].dt.month == 9)]

def get_time_of_day(hour):
    if 6 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 18:
        return 'Afternoon'
    elif 18 <= hour < 22:
        return 'Evening'
    else:
        return 'Night'


# Data Bucketing
weather_df['source_date'] = pd.to_datetime(weather_df['datetime'].dt.date)
weather_df['hour'] = weather_df['datetime'].dt.hour
weather_df['time_of_day'] = weather_df['hour'].apply(get_time_of_day)

# Merging Data

In [46]:
# Data Merging
merged_df = pd.merge(taxi_df, weather_df, on=['source_date', 'hour'], how='inner')

# Convert the transformed DataFrame back to CSV format
transformed_csv = merged_df.to_csv("ddo_merged_data.csv")