## Staging Dataset

In [1]:
from google.cloud import bigquery

project_id = "saffatandsourik"
dataset = "formula1_stg"
region = "us-central1"

bq_client = bigquery.Client()

dataset_id = bigquery.Dataset(f"{project_id}.{dataset}")
dataset_id.location = region
resp = bq_client.create_dataset(dataset_id, exists_ok=True)
print("Created dataset {}.{}".format(bq_client.project, resp.dataset_id));

Created dataset saffatandsourik.formula1_stg


## drivers_openf1

In [None]:
%%bigquery
CREATE OR REPLACE TABLE formula1_stg.drivers_openf1 AS
SELECT
    driver_number,
    broadcast_name,
    SPLIT(full_name, ' ')[OFFSET(0)] AS first_name,
    CASE
        WHEN ARRAY_LENGTH(SPLIT(full_name, ' ')) = 2 THEN NULL
        WHEN ARRAY_LENGTH(SPLIT(full_name, ' ')) > 2 THEN SPLIT(full_name, ' ')[OFFSET(1)]
    END AS middle_name,
    SPLIT(full_name, ' ')[SAFE_OFFSET(ARRAY_LENGTH(SPLIT(full_name, ' ')) - 1)] AS last_name,
    name_acronym,
    team_name,
    team_colour,
    headshot_url,
    country_code,
    session_key,
    _data_source,
    _load_time
FROM formula1_raw.drivers_openf1


Query is running:   0%|          |

## laps

In [None]:
%%bigquery
  SELECT TIMESTAMP(date_start) AS date_start FROM formula1_raw.laps
  LIMIT 100;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,date_start
0,2023-02-23 07:00:34.221000+00:00
1,2023-02-23 12:36:07.799000+00:00
2,2023-02-23 12:39:45.983000+00:00
3,2023-02-23 07:01:04.533000+00:00
4,2023-02-23 12:39:27.343000+00:00
...,...
95,2023-02-23 07:34:01.729000+00:00
96,2023-02-23 13:02:48.467000+00:00
97,2023-02-23 14:28:56.859000+00:00
98,2023-02-23 07:36:24.883000+00:00


In [None]:
%%bigquery
  CREATE OR REPLACE TABLE formula1_stg.laps AS
    SELECT
      session_key,
      driver_number,
      i1_speed,
      i2_speed,
      st_speed,
      TIMESTAMP(date_start) AS date_start,
      lap_duration,
      is_pit_out_lap,
      duration_sector_1,
      duration_sector_2,
      duration_sector_3,
      segments_sector_1,
      segments_sector_2,
      segments_sector_3,
      lap_number,
      _data_source,
      _load_time
    FROM formula1_raw.laps

Query is running:   0%|          |

## ergast_drivers

In [None]:
%%bigquery
  CREATE OR REPLACE TABLE formula1_stg.ergast_drivers AS
    SELECT
      year,
      driver_id,
      permanent_number,
      code,
      given_name,
      family_name,
      DATE(date_of_birth) as date_of_birth,
      nationality,
      wikipedia_url,
      _data_source,
      _load_time
    FROM formula1_raw.ergast_drivers

Query is running:   0%|          |

## ergast_lap_times

In [None]:
%%bigquery
  CREATE OR REPLACE TABLE formula1_stg.ergast_lap_times AS
    SELECT
      Year AS year,
      Round AS round,
      'Race Name' AS race_name,
      'Circuit Name' AS circuit_name,
      Location AS location,
      Country AS country,
      'Race Date' AS race_date,
      'Lap Number' AS lap_number,
      'Driver ID' AS driver_id,
      Position AS position,
      'Lap Time' AS lap_time,
      _data_source,
      _load_time
    FROM formula1_raw.ergast_lap_times


Query is running:   0%|          |

## ergast_races

In [None]:
%%bigquery
CREATE OR REPLACE TABLE formula1_stg.ergast_races AS
SELECT
    CASE WHEN CAST(race_id AS STRING) = '\\\\N' THEN NULL ELSE race_id END AS race_id,
    CASE WHEN CAST(year AS STRING) = '\\\\N' THEN NULL ELSE year END AS year,
    CASE WHEN CAST(round AS STRING) = '\\\\N' THEN NULL ELSE round END AS round,
    CASE WHEN CAST(circuit_id AS STRING) = '\\\\N' THEN NULL ELSE circuit_id END AS circuit_id,
    CASE WHEN name = '\\\\N' THEN NULL ELSE name END AS name,
    CASE WHEN url = '\\\\N' THEN NULL ELSE url END AS url,

    -- Convert date and time fields, replacing '\\N' correctly
    CASE WHEN date = '\\\\N' THEN NULL ELSE date END AS date,
    CASE WHEN time = '\\\\N' THEN NULL ELSE time END AS time,
    CASE WHEN fp1_date = '\\\\N' THEN NULL ELSE fp1_date END AS fp1_date,
    CASE WHEN fp1_time = '\\\\N' THEN NULL ELSE fp1_time END AS fp1_time,
    CASE WHEN fp2_date = '\\\\N' THEN NULL ELSE fp2_date END AS fp2_date,
    CASE WHEN fp2_time = '\\\\N' THEN NULL ELSE fp2_time END AS fp2_time,
    CASE WHEN fp3_date = '\\\\N' THEN NULL ELSE fp3_date END AS fp3_date,
    CASE WHEN fp3_time = '\\\\N' THEN NULL ELSE fp3_time END AS fp3_time,
    CASE WHEN quali_date = '\\\\N' THEN NULL ELSE quali_date END AS quali_date,
    CASE WHEN quali_time = '\\\\N' THEN NULL ELSE quali_time END AS quali_time,
    CASE WHEN sprint_date = '\\\\N' THEN NULL ELSE sprint_date END AS sprint_date,
    CASE WHEN sprint_time = '\\\\N' THEN NULL ELSE sprint_time END AS sprint_time,

  -- Make it all date time and concatanete
    DATETIME(PARSE_DATE('%m/%d/%Y', NULLIF(date, '\\\\N')), CAST(NULLIF(time, '\\\\N') AS TIME)) AS date_time,
    DATETIME(PARSE_DATE('%m/%d/%Y', NULLIF(fp1_date, '\\\\N')), CAST(NULLIF(fp1_time, '\\\\N') AS TIME)) AS fp1_date_time,
    DATETIME(PARSE_DATE('%m/%d/%Y', NULLIF(fp2_date, '\\\\N')), CAST(NULLIF(fp2_time, '\\\\N') AS TIME)) AS fp2_date_time,
    DATETIME(PARSE_DATE('%m/%d/%Y', NULLIF(fp3_date, '\\\\N')), CAST(NULLIF(fp3_time, '\\\\N') AS TIME)) AS fp3_date_time,
    DATETIME(PARSE_DATE('%m/%d/%Y', NULLIF(quali_date, '\\\\N')), CAST(NULLIF(quali_time, '\\\\N') AS TIME)) AS quali_date_time,
    DATETIME(PARSE_DATE('%m/%d/%Y', NULLIF(sprint_date, '\\\\N')), CAST(NULLIF(sprint_time, '\\\\N') AS TIME)) AS sprint_date_time
FROM formula1_raw.ergast_races


Query is running:   0%|          |

## meetings



In [None]:
%%bigquery
  SELECT
    date_start,
    gmt_offset,
    TIMESTAMP_SUB(
        TIMESTAMP(date_start),
        INTERVAL CAST(SUBSTR(gmt_offset, 1, 2) AS INT64) HOUR
    ) AS utc_time
  FROM formula1_raw.meetings

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,date_start,gmt_offset,utc_time
0,2024-06-21T11:30:00+00:00,02:00:00,2024-06-21 09:30:00+00:00
1,2024-06-21T11:30:00+00:00,02:00:00,2024-06-21 09:30:00+00:00
2,2024-06-21T11:30:00+00:00,02:00:00,2024-06-21 09:30:00+00:00
3,2024-06-21T11:30:00+00:00,02:00:00,2024-06-21 09:30:00+00:00
4,2024-06-21T11:30:00+00:00,02:00:00,2024-06-21 09:30:00+00:00
...,...,...,...
324,2024-09-20T09:30:00+00:00,08:00:00,2024-09-20 01:30:00+00:00
325,2024-09-20T09:30:00+00:00,08:00:00,2024-09-20 01:30:00+00:00
326,2024-09-20T09:30:00+00:00,08:00:00,2024-09-20 01:30:00+00:00
327,2024-09-20T09:30:00+00:00,08:00:00,2024-09-20 01:30:00+00:00


In [None]:
%%bigquery
  CREATE OR REPLACE TABLE formula1_stg.meetings AS
    SELECT
      meeting_name,
      meeting_official_name,
      location,
      country_key,
      country_code,
      country_name,
      circuit_key,
      circuit_short_name,
      date_start,
      gmt_offset,
      TIMESTAMP_SUB(
          TIMESTAMP(date_start),
          INTERVAL CAST(SUBSTR(gmt_offset, 1, 2) AS INT64) HOUR) AS utc_time,
      meeting_key,
      year,
      meeting_code,
      _data_source,
      _load_time
    FROM formula1_raw.meetings

Query is running:   0%|          |

## pit

In [None]:
%%bigquery
  CREATE OR REPLACE TABLE formula1_stg.pit AS
    SELECT
      pit_duration,
      lap_number,
      driver_number,
      DATE(date) AS date,
      session_key,
      _data_source,
      _load_time
    FROM formula1_raw.pit

Query is running:   0%|          |

## qualifying_results

In [None]:
%%bigquery
SELECT DISTINCT driver_name
FROM formula1_raw.qualifying_results

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,driver_name
0,Carlos Sainz
1,George Russell
2,Charles Leclerc
3,Fernando Alonso
4,Lance Stroll
5,Nico Hülkenberg
6,Max Verstappen
7,Yuki Tsunoda
8,Alexander Albon
9,Sergio Pérez


In [None]:
%%bigquery
SELECT
    driver_name,
    SPLIT(driver_name, ' ')[OFFSET(0)] AS first_name,
    SPLIT(driver_name, ' ')[SAFE_OFFSET(1)] AS last_name
FROM formula1_raw.qualifying_results


Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,driver_name,first_name,last_name
0,Carlos Sainz,Carlos,Sainz
1,Carlos Sainz,Carlos,Sainz
2,Carlos Sainz,Carlos,Sainz
3,Carlos Sainz,Carlos,Sainz
4,George Russell,George,Russell
...,...,...,...
3671,Franco Colapinto,Franco,Colapinto
3672,Jack Doohan,Jack,Doohan
3673,Jack Doohan,Jack,Doohan
3674,Jack Doohan,Jack,Doohan


In [None]:
%%bigquery
  CREATE OR REPLACE TABLE formula1_stg.qualifying_results AS
    SELECT
      year,
      round,
      race_name,
      circuit_name,
      date,
      position,
      driver_name,
      SPLIT(driver_name, ' ')[OFFSET(0)] AS first_name,
      SPLIT(driver_name, ' ')[SAFE_OFFSET(1)] AS last_name,
      driver_nationality,
      constructor_name,
      constructor_nationality,
      q1_time,
      q2_time,
      q3_time,
      _data_source,
      _load_time
    FROM formula1_raw.qualifying_results

Query is running:   0%|          |

## circuits

In [None]:
%%bigquery
CREATE OR REPLACE TABLE formula1_stg.circuits AS
SELECT
    circuit_name,
    city,
    country,
    latitude,
    longitude,
    capacity,
    fia_grade,
    circuit_status,
    _data_source,
    _load_time
FROM formula1_raw.circuits


Query is running:   0%|          |

## historical_cars

In [None]:
%%bigquery
CREATE OR REPLACE TABLE formula1_stg.historical_cars AS
SELECT
  pos,
  driver,
  nationality,
  car,
  pts,
  year,
  code
FROM formula1_raw.historical_cars


Query is running:   0%|          |

## race_results

In [None]:
%%bigquery
CREATE OR REPLACE TABLE formula1_stg.race_results AS
SELECT
  season,
  round,
  race_name,
  PARSE_DATE('%m/%d/%Y', date) AS date,
  SAFE.PARSE_TIME('%H:%M:%S', time) AS time,
  circuit,
  circuit_id,
  location,
  country,
  number,
  position,
  position_text,
  points,
  laps,
  status,
  driver,
  permanent_number,
  code,
  given_name
FROM formula1_raw.race_results


Query is running:   0%|          |

## sessions

In [None]:
%%bigquery
CREATE OR REPLACE TABLE formula1_stg.sessions AS
SELECT
  location,
  country_key,
  country_code,
  country_name,
  circuit_key,
  circuit_short_name,
  session_name,
  DATE(date_start) as date_start,
  DATE(date_end) as date_end,
  gmt_offset,
  session_key,
  meeting_key,
  year,
  _data_source,
  _load_time
FROM formula1_raw.sessions


Query is running:   0%|          |