In [0]:
%sql
SELECT
pickup_zip,
COUNT(*) AS total_trips,
AVG(fare_amount) AS avg_fare_amount,
AVG(trip_distance) AS avg_trip_distance
FROM
mc_labs.trips_raw
GROUP BY
pickup_zip
ORDER BY
total_trips DESC; -- Ordena pelos CEPs com mais viagens

In [0]:
%sql
WITH
ranked_trips AS (
  SELECT
    tpep_pickup_datetime,
    tpep_dropoff_datetime,
    trip_distance,
    fare_amount,
    pickup_zip,
    dropoff_zip,
    ROW_NUMBER() OVER (
      PARTITION BY
        tpep_pickup_datetime,
        tpep_dropoff_datetime,
        trip_distance,
        fare_amount -- Usando uma combinação de campos para identificar "duplicatas"
      ORDER BY
        tpep_pickup_datetime -- Ordem para decidir qual "duplicata" manter
    ) AS rn
  FROM
    mc_labs.trips_raw
)
SELECT
tpep_pickup_datetime,
tpep_dropoff_datetime,
trip_distance,
fare_amount,
pickup_zip,
dropoff_zip -- Lista explicitamente todas as colunas que quero manter, exceto 'rn'
FROM
ranked_trips
WHERE
rn = 1;

In [0]:
%sql
SELECT
  CAST(tpep_pickup_datetime AS DATE) AS trip_date,
  COUNT(*) AS daily_total_trips,
  AVG(fare_amount) AS daily_avg_fare_amount,
  AVG(trip_distance) AS daily_avg_trip_distance
FROM
  mc_labs.trips_raw
GROUP BY
  CAST(tpep_pickup_datetime AS DATE)
ORDER BY
  trip_date;

In [0]:
%sql
SELECT
t.tpep_pickup_datetime,
t.tpep_dropoff_datetime,
t.trip_distance,
t.fare_amount,
t.pickup_zip,
z.city_name AS pickup_city
FROM
mc_labs.trips_raw AS t
INNER JOIN
mc_labs.zip_code_info AS z
ON
t.pickup_zip = z.zip
LIMIT 100; -- Limita para facilitar a visualização inicial

In [0]:
%sql
SELECT
t.tpep_pickup_datetime,
t.trip_distance,
t.fare_amount,
t.pickup_zip,
z.city_name AS pickup_city
FROM
mc_labs.trips_raw AS t
LEFT JOIN
mc_labs.zip_code_info AS z
ON
t.pickup_zip = z.zip
LIMIT 100; -- Limita para facilitar a visualização inicial

In [0]:
%sql
SELECT
t.tpep_pickup_datetime,
t.tpep_dropoff_datetime,
t.trip_distance,
t.fare_amount,
t.pickup_zip,
z_pickup.city_name AS pickup_city,
t.dropoff_zip,
z_dropoff.city_name AS dropoff_city
FROM
mc_labs.trips_raw AS t
INNER JOIN
mc_labs.zip_code_info AS z_pickup
ON
t.pickup_zip = z_pickup.zip
INNER JOIN
mc_labs.zip_code_info AS z_dropoff
ON
t.dropoff_zip = z_dropoff.zip
LIMIT 100;

In [0]:
%sql
SELECT
t.tpep_pickup_datetime,
t.trip_distance,
t.fare_amount,
t.pickup_zip
FROM
mc_labs.trips_raw AS t
WHERE
t.pickup_zip IN (
  SELECT
    zip
  FROM
    mc_labs.zip_code_info
  WHERE
    city_name = 'New York'
)
LIMIT 100;

In [0]:
%sql
SELECT
z.city_name,
trip_counts.total_trips_from_city
FROM
mc_labs.zip_code_info AS z
INNER JOIN (
SELECT
  pickup_zip,
  COUNT(*) AS total_trips_from_city
FROM
  mc_labs.trips_raw
GROUP BY
  pickup_zip
) AS trip_counts
ON
z.zip = trip_counts.pickup_zip
ORDER BY
trip_counts.total_trips_from_city DESC
LIMIT 10; -- Mostra as 10 cidades com mais viagens de origem