# Data Engineering - Postgres Queries

## Import Libraries

In [1]:
import pandas as pd

In [2]:
from sqlalchemy import create_engine

## Connect Postgres

In [3]:
engine = create_engine('postgresql://root:root@localhost:5432/ny_taxi')

In [4]:
query = """
SELECT 1 as number;
"""

pd.read_sql(query, con=engine)

Unnamed: 0,number
0,1


## Homework

### Question 3. Count records

How many taxi trips were totally made on January 15?

Tip: started and finished on 2019-01-15.

Remember that lpep_pickup_datetime and lpep_dropoff_datetime columns are in the format timestamp (date and hour+min+sec) and not in date.

In [5]:
query = """
SELECT
    COUNT(*)
FROM
    green_taxi_trips t
WHERE
    lpep_pickup_datetime::date = '2019-01-15'
AND
    lpep_dropoff_datetime::date = '2019-01-15';
"""

pd.read_sql(query, con=engine)

Unnamed: 0,count
0,20530


### Question 4. Largest trip for each day

Which was the day with the largest trip distance Use the pick up time for your calculations.

In [6]:
query = """
SELECT 
    CAST(lpep_pickup_datetime AS DATE) AS "Day",
    MAX(trip_distance) as "Max_Distance"
FROM
    green_taxi_trips t
GROUP BY
    "Day"
ORDER BY "Max_Distance" DESC
LIMIT 1;
"""

pd.read_sql(query, con=engine)

Unnamed: 0,Day,Max_Distance
0,2019-01-15,117.99


### Question 5. The number of passengers

In 2019-01-01 how many trips had 2 and 3 passengers?

In [7]:
query = """
SELECT
    passenger_count, count(*) as "passenger"
FROM
    green_taxi_trips
WHERE
    lpep_pickup_datetime::date = '2019-01-01'
GROUP BY passenger_count
"""

pd.read_sql(query, con=engine)

Unnamed: 0,passenger_count,passenger
0,0,21
1,1,12415
2,2,1282
3,3,254
4,4,129
5,5,616
6,6,273


### Question 6: Largest tip (Multiple choice)

For the passengers picked up in the Astoria Zone which was the drop off zone that had the largest tip? We want the name of the zone, not the id.

Note: it's not a typo, it's tip , not trip

In [8]:
query = """
SELECT
    tip_amount, zpu."LocationID", zpu."Zone", zdo."LocationID", zdo."Zone"
FROM
    green_taxi_trips t
LEFT JOIN zones zpu
        ON t."PULocationID" = zpu."LocationID"
LEFT JOIN zones zdo
        ON t."DOLocationID" = zdo."LocationID"
WHERE
    zpu."Zone" = 'Astoria'
ORDER BY tip_amount DESC
LIMIT 1;
"""

pd.read_sql(query, con=engine)

Unnamed: 0,tip_amount,LocationID,Zone,LocationID.1,Zone.1
0,88.0,7,Astoria,146,Long Island City/Queens Plaza


# END