In [0]:
CREATE DATABASE IF NOT EXISTS f1_raw; 

### Create circuits table

In [0]:
DROP TABLE IF EXISTS f1_raw_circuits;
CREATE TABLE IF NOT EXISTS f1_raw_circuits
(
  circuitId INT,
  name STRING,
  location STRING,
  country STRING,
  lat DOUBLE,
  lng DOUBLE,
  alt INT,
  url STRING
)
USING CSV
OPTIONS (path "/mnt/formula1in2025/raw/circuits.csv", header True)

In [0]:
SELECT * 
FROM f1_raw_circuits

### Create races table

In [0]:
DROP TABLE IF EXISTS f1_raw_races;
CREATE TABLE IF NOT EXISTS f1_raw_races
(
  raceId INT,
  year INT,
  round INT, 
  circuitId INT, 
  name STRING,
  date DATE, 
  time STRING, 
  url STRING
)
USING CSV
OPTIONS (path "/mnt/formula1in2025/raw/races.csv", header True)

In [0]:
SELECT * 
FROM f1_raw_races

### Create tables from JSON files

#### Create constructors table

In [0]:
DROP TABLE IF EXISTS f1_raw_constructors;
CREATE TABLE IF NOT EXISTS f1_raw_constructors
(
  constructorId INT,
  constructorRef STRING, 
  name STRING,
  nationality STRING, 
  url STRING
)
USING json
OPTIONS (path "/mnt/formula1in2025/raw/constructors.json")

In [0]:
SELECT * 
FROM f1_raw_constructors

#### Create drivers table

In [0]:
DROP TABLE IF EXISTS f1_raw_drivers;
CREATE TABLE IF NOT EXISTS f1_raw_drivers
(
  driverId INT,
  driverRef STRING, 
  number INT,
  code STRING,
  name STRUCT<forename: STRING, surname: STRING>, -- specify the data type
  dob DATE, 
  nationality STRING, 
  url STRING,
)
USING json
OPTIONS (path "/mnt/formula1in2025/raw/drivers.son")

#### Create results table

In [0]:
DROP TABLE IF EXISTS f1_raw_results;
CREATE TABLE IF NOT EXISTS f1_raw_results
(
  resultId INT,
  raceId STRING, 
  driverId INT,
  constructorId INT,
  number INT,
  grid INT,
  position INT,
  positionText STRING,
  positionOrder INT,
  points INT,
  laps INT,
  time STRING,
  milliseconds INT,
  fastestLap INT,
  rank INT,
  fastestLapTime STRING,
  fastestLapSpeed FLOAT,
  statusId STRING
)
USING json
OPTIONS (path "/mnt/formula1in2025/raw/results.son")

In [0]:
SELECT * 
FROM f1_raw_drivers

#### Create pit stops table (JSON file with multiple lines)

In [0]:
DROP TABLE IF EXISTS f1_raw_pit_stops;
CREATE TABLE IF NOT EXISTS f1_raw_pit_stops
(
  driverId INT,
  duration STRING, 
  lap INT,
  milliseconds INT,
  raceId INT, 
  stop INT,
  time STRING
)
USING json
OPTIONS (path "/mnt/formula1in2025/raw/pit_stops.son", multiline True)

In [0]:
SELECT * 
FROM f1_raw_pit_stops

#### Create lap times table (folder contains multiple files)

In [0]:
DROP TABLE IF EXISTS f1_raw_lap_times;
CREATE TABLE IF NOT EXISTS f1_raw_lap_times
(
  raceId INT,
  driverId INT,
  lap INT,
  milliseconds INT,
  position INT,
  time STRING
)
USING json
OPTIONS (path "/mnt/formula1in2025/raw/lap_times")

In [0]:
SELECT COUNT(1) FROM f1_raw_lap_times

#### Create qualifying table

In [0]:
DROP TABLE IF EXISTS f1_raw_qualifying;
CREATE TABLE IF NOT EXISTS f1_raw_qualifying
(
  constructorId INT,
  driverId INT, 
  number INT,
  position INT,
  q1 STRING,
  q2 STRING,
  q3 STRING,
  qualifyId INT,
  raceId INT
)
USING json
OPTIONS (path "/mnt/formula1in2025/raw/qualifying", multiline True)

In [0]:
SELECT COUNT(1) FROM f1_raw_qualifying