### Import modules

In [None]:
import pandas as pd
import requests
import os
from datetime import datetime
from dateutil.relativedelta import relativedelta

### Define time range

In [None]:
current_datetime = datetime.now() - relativedelta(months=2)

formatted_datetime = current_datetime.strftime("%Y-%m-%d")

### Get data by API

In [None]:
# headers ={"X-App Token": os.environ.get("CHICAGO_API_TOKEN")}

url = (
    f"https://data.cityofchicago.org/resource/ajtu-isnz.json?"
    f"$where=trip_start_timestamp >= '{formatted_datetime}T00:00:00' "
    f"AND trip_start_timestamp <= '{formatted_datetime}T23:59:59'&$limit=30000")


response = requests.get(url)

data = response.json()

taxi_trips = pd.DataFrame(data)


#### TaxiTrip Transform 1: Deal with NaN

In [None]:
taxi_trips.drop(["pickup_census_tract", "dropoff_census_tract"], axis=1, inplace=True)

taxi_trips.drop(["pickup_centroid_location", "dropoff_centroid_location"], axis=1, inplace=True)

taxi_trips.dropna(inplace=True)

#### TaxiTrip Transform 2: Renaming

In [None]:
taxi_trips.rename(columns={"pickup_community_area": "pickup_community_area_id", "dropoff_community_area": "dropoff_community_area_id"}, inplace=True)

#### TaxiTrip Transform 3: Helper Column

In [None]:
taxi_trips["trip_start_timestamp"] = pd.to_datetime(taxi_trips["trip_start_timestamp"])

taxi_trips["datetime_for_weather"] = taxi_trips["trip_start_timestamp"].dt.floor("h")

### Get Weather data

In [None]:
url = "https://archive-api.open-meteo.com/v1/era5?latitude=52.52&longitude=13.41&start_date=2021-01-01&end_date=2021-12-31&hourly=temperature_2m"

response = requests.get(url)

data = response.json()

#### Weather data: Params

In [None]:
url_mod = "https://archive-api.open-meteo.com/v1/era5"

date = "2013-06-01"

params = {
    "latitude" : 41.85, 
    "longitude" : -87.65,
    "start_date" : date,
    "end_date" : date,
    "hourly" : "temperature_2m,wind_speed_10m,rain,precipitation"
    }

response_mod = requests.get(url_mod, params=params)

weather_data = response_mod.json()

#### Weather data: Filtered

In [None]:
weather_data_filtered = {"date_time": weather_data["hourly"]["time"], 
    "temperature": weather_data["hourly"]["temperature_2m"],
    "wind_speed": weather_data["hourly"]["wind_speed_10m"],
    "rain": weather_data["hourly"]["rain"],
    "precipitation" : weather_data["hourly"]["precipitation"] }

weather_df = pd.DataFrame(weather_data_filtered)

#### Weather data: Convert date-time

In [None]:
weather_df["date_time"] = pd.to_datetime(weather_df["date_time"])

### Merge TaxiTrips & Weather

In [None]:
taxi_trips_with_weather = taxi_trips.merge(weather_df, left_on="datetime_for_weather", right_on="date_time")

### TaxiTrips: Data Types

In [None]:
data_types = {"trip_end_timestamp": "datetime64[ns]", 
              "trip_seconds": "int32", 
              "trip_miles": "float", 
              "pickup_community_area_id" : "int8",
              "dropoff_community_area_id" : "int8",
              "fare": "float", 
              "tips": "float", 
              "tolls": "float", 
              "extras": "float", 
              "trip_total": "float"}

taxi_trips = taxi_trips.astype(data_types)

### TaxiTrips: Sanity-Check

In [None]:
taxi_trips[taxi_trips["trip_end_timestamp"] == taxi_trips["trip_end_timestamp"].max()]

# taxi_trips.nlargest(10, "fare")

### Data Modeling

In [None]:
payment_type_master = taxi_trips["payment_type"].drop_duplicates().reset_index(drop=True)

payment_type_master = pd.DataFrame(
    {
        "payment_type_id" : range(1, len(payment_type_master) + 1),
        "payment_type": payment_type_master
    })

payment_type_master

In [None]:
company_master = taxi_trips["company"].drop_duplicates().reset_index(drop=True)

company_master = pd.DataFrame(
    {
        "company_id" : range(1, len(company_master) + 1),
        "company": company_master
    })

company_master

In [None]:
taxi_trips_id = taxi_trips.merge(payment_type_master, on = "payment_type")

taxi_trips_id = taxi_trips_id.merge(company_master, on = "company")

taxi_trips_id.sample(5)

In [None]:
taxi_trips_id.drop(["payment_type", "company"], axis = 1, inplace=True)

In [None]:
payment_type_master.to_csv("Payment_type_master.csv", index=False)

company_master.to_csv("Company_master.csv", index=False)

### Check the Modeling

#### 1. Payment-type

In [None]:
payment_type_master = taxi_trips["payment_type"].drop_duplicates().reset_index(drop=True)

payment_type_master = pd.DataFrame(
    {
        "payment_type_id" : range(1, len(payment_type_master) + 1),
        "payment_type": payment_type_master
    })

payment_type_master

In [None]:
new_payment_type_data = [
    {"payment_type": "Credit Card"},
    {"payment_type": "X"},
    {"payment_type": "Y"}]

new_payment_type_mapping = pd.DataFrame(new_payment_type_data)

In [None]:
payment_type_max_id = payment_type_master["payment_type_id"].max()

In [None]:
new_payment_type_mapping["payment_type_id"] = range(payment_type_max_id + 1, payment_type_max_id + len(new_payment_type_mapping) + 1)

new_payment_type_mapping

In [None]:
new_payment_types = new_payment_type_mapping[~new_payment_type_mapping["payment_type"].isin(payment_type_master["payment_type"])]

new_payment_types

In [None]:
if not new_payment_types.empty:
    payment_type_master = pd.concat([payment_type_master, new_payment_types], ignore_index=True)

In [None]:
payment_type_master

#### 2. Company-check

In [None]:
company_master = taxi_trips["company"].drop_duplicates().reset_index(drop=True)

company_master = pd.DataFrame(
    {
        "company_id" : range(1, len(company_master) + 1),
        "company": company_master
    })


In [None]:
new_company_data = [
    {"company": "Sun Taxi"},
    {"company": "X"},
    {"company": "Y"}]

new_company_mapping = pd.DataFrame(new_company_data)

new_company_mapping

In [None]:
company_max_id = company_master["company_id"].max()

In [None]:
new_company_mapping["company_id"] = range(company_max_id + 1, company_max_id + len(new_company_mapping) + 1)

new_company_mapping

In [None]:
new_companies = new_company_mapping[~new_company_mapping["company"].isin(company_master["company"])]

new_companies

In [None]:
if not new_companies.empty:
    company_master = pd.concat([company_master, new_companies], ignore_index=True)

company_master