In [0]:
%sql
USE mc_labs;

## calcular a duração da viagem em minutos e depois selecionar apenas as viagens com duração superior a um certo limite.

In [0]:
%sql
WITH TripDurations AS (
  SELECT
    tpep_pickup_datetime,
    tpep_dropoff_datetime,
    trip_distance,
    fare_amount,
    pickup_zip,
    dropoff_zip,
    (UNIX_TIMESTAMP(tpep_dropoff_datetime) - UNIX_TIMESTAMP(tpep_pickup_datetime)) / 60 AS trip_duration_minutes
  FROM
    trips_raw
  WHERE
    tpep_pickup_datetime IS NOT NULL AND tpep_dropoff_datetime IS NOT NULL
)
SELECT
  tpep_pickup_datetime,
  trip_duration_minutes,
  fare_amount,
  pickup_zip
FROM
  TripDurations
WHERE
  trip_duration_minutes > 60 -- Viagens com mais de 60 minutos
ORDER BY
  trip_duration_minutes DESC
LIMIT 20;

## Usando múltiplas CTEs encadeadas para lógica mais complexa. Vamos calcular a duração média da viagem por pickup_zip e depois ranquear esses pickup_zips com base na duração média.

In [0]:
%sql
WITH TripDurations AS (
  SELECT
    pickup_zip,
    (UNIX_TIMESTAMP(tpep_dropoff_datetime) - UNIX_TIMESTAMP(tpep_pickup_datetime)) / 60 AS trip_duration_minutes
  FROM
    trips_raw
  WHERE
    tpep_pickup_datetime IS NOT NULL AND tpep_dropoff_datetime IS NOT NULL AND pickup_zip IS NOT NULL
),
AvgDurationPerZip AS (
  SELECT
    pickup_zip,
    AVG(trip_duration_minutes) AS average_trip_duration_minutes
  FROM
    TripDurations
  GROUP BY
    pickup_zip
)
SELECT
  pickup_zip,
  average_trip_duration_minutes,
  RANK() OVER (ORDER BY average_trip_duration_minutes DESC) AS rank_by_avg_duration
FROM
  AvgDurationPerZip
ORDER BY
  rank_by_avg_duration
LIMIT 20;

## View para viagens com duração calculada. View que adiciona a coluna trip_duration_minutes à tabela trips_raw.

In [0]:
%sql
CREATE OR REPLACE VIEW trips_with_duration AS
SELECT
  tpep_pickup_datetime,
  tpep_dropoff_datetime,
  trip_distance,
  fare_amount,
  pickup_zip,
  dropoff_zip,
  (UNIX_TIMESTAMP(tpep_dropoff_datetime) - UNIX_TIMESTAMP(tpep_pickup_datetime)) / 60 AS trip_duration_minutes
FROM
  trips_raw
WHERE
  tpep_pickup_datetime IS NOT NULL AND tpep_dropoff_datetime IS NOT NULL;

-- Consultar a View
SELECT *
FROM trips_with_duration
WHERE trip_duration_minutes > 120 -- Viagens com mais de 2 horas
LIMIT 10;

## View que combina informações de trips_raw e zip_code_info. View que junta as informações das viagens com os nomes das cidades de partida e destino.

In [0]:
%sql
CREATE OR REPLACE VIEW trips_with_zip_details AS
SELECT
  t.tpep_pickup_datetime,
  t.tpep_dropoff_datetime,
  t.trip_distance,
  t.fare_amount,
  t.pickup_zip,
  z_pickup.city_name AS pickup_city_name,
  t.dropoff_zip,
  z_dropoff.city_name AS dropoff_city_name
FROM
  trips_raw AS t
LEFT JOIN
  zip_code_info AS z_pickup ON t.pickup_zip = z_pickup.zip
LEFT JOIN
  zip_code_info AS z_dropoff ON t.dropoff_zip = z_dropoff.zip
WHERE
  t.pickup_zip IS NOT NULL AND t.dropoff_zip IS NOT NULL;

-- Consultar a View
SELECT
  pickup_city_name,
  COUNT(*) AS total_trips,
  AVG(fare_amount) AS average_fare
FROM
  trips_with_zip_details
GROUP BY
  pickup_city_name
ORDER BY
  total_trips DESC
LIMIT 10;