### Importação das bibliotecas necessárias

Esta seção consolida todas as importações de bibliotecas necessárias

In [0]:
from pyspark.sql import functions as F

In [0]:
spark.sql("USE CATALOG mvp")
spark.sql("USE SCHEMA silver")

DataFrame[]

### Carregamento do dataset
Carregamento da tabela bronze com as informações de inventário das estações meteorológicas do INMET e das leituras das estações.

In [0]:
df_bronze_stations = spark.table("mvp.bronze.stations")
display(df_bronze_stations.limit(10))

df_bronze_weather_data = spark.table("mvp.bronze.weather_data")
display(df_bronze_weather_data.limit(10))

region,state,city_station,id_station,lat,lon,lvl,record_first,record_last
CO,DF,BRASILIA,A001,-1578944444,-4792583332,115954,2000-05-07,2025-05-31
NE,BA,SALVADOR,A401,-1301666666,-3851666666,5141,2000-05-13,2025-05-31
N,AM,MANAUS,A101,-310333333,-6001638888,6125,2000-05-09,2025-05-31
SE,RJ,ECOLOGIA AGRICOLA,A601,-228,-4368333333,33,2000-05-07,2025-05-31
S,RS,PORTO ALEGRE,A801,-3005,-5116666666,4697,2000-09-22,2025-05-31
CO,GO,GOIANIA,A002,-1664277777,-4921999999,770,2001-05-29,2025-05-19
CO,GO,MORRINHOS,A003,-1771666667,-491,77142,2001-05-25,2025-05-31
CO,MS,CAMPO GRANDE,A702,-2045,-546,530,2001-09-10,2025-05-31
CO,MS,PONTA PORA,A703,-225525,-5571638888,675,2001-09-07,2025-05-31
CO,MS,TRES LAGOAS,A704,-2078999999,-5171222222,313,2001-09-03,2025-05-31


DATA__YYYY_MM_DD_,Hora_UTC,PRECIPITAÇÃO_TOTAL__HORÁRIO__mm_,PRESSAO_ATMOSFERICA_AO_NIVEL_DA_ESTACAO__HORARIA__mB_,PRESSÃO_ATMOSFERICA_MAX_NA_HORA_ANT___AUT___mB_,PRESSÃO_ATMOSFERICA_MIN__NA_HORA_ANT___AUT___mB_,RADIACAO_GLOBAL__KJ/m²_,TEMPERATURA_DO_AR___BULBO_SECO__HORARIA___C_,TEMPERATURA_DO_PONTO_DE_ORVALHO___C_,TEMPERATURA_MÁXIMA_NA_HORA_ANT___AUT____C_,TEMPERATURA_MÍNIMA_NA_HORA_ANT___AUT____C_,TEMPERATURA_ORVALHO_MAX__NA_HORA_ANT___AUT____C_,TEMPERATURA_ORVALHO_MIN__NA_HORA_ANT___AUT____C_,UMIDADE_REL__MAX__NA_HORA_ANT___AUT___%_,UMIDADE_REL__MIN__NA_HORA_ANT___AUT___%_,UMIDADE_RELATIVA_DO_AR__HORARIA__%_,VENTO__DIREÇÃO_HORARIA__gr______gr__,VENTO__RAJADA_MAXIMA__m/s_,VENTO__VELOCIDADE_HORARIA__m/s_,ESTACAO
2015-01-01,00:00,0.0,886.8,886.8,886.2,,20.1,15.7,20.9,20.0,16.6,15.4,80.0,72.0,76.0,84.0,1.0,0.5,A001
2015-01-01,01:00,0.0,887.5,887.6,886.8,,19.8,16.0,20.1,19.2,16.0,15.4,80.0,76.0,79.0,20.0,1.1,0.4,A001
2015-01-01,02:00,0.0,888.1,888.1,887.5,,18.6,15.7,20.0,18.5,16.3,15.5,84.0,78.0,84.0,284.0,1.5,1.0,A001
2015-01-01,03:00,0.0,887.8,888.2,887.8,,18.6,15.9,19.5,18.6,16.1,15.6,84.0,80.0,84.0,265.0,1.3,0.7,A001
2015-01-01,04:00,0.0,887.1,887.9,887.1,,19.3,16.0,19.5,18.6,16.4,15.9,84.0,81.0,81.0,110.0,0.9,0.2,A001
2015-01-01,05:00,0.0,886.6,887.1,886.6,,18.9,16.1,19.3,18.4,16.1,15.5,85.0,80.0,83.0,247.0,0.9,0.5,A001
2015-01-01,06:00,0.0,886.3,886.6,886.2,,18.5,16.7,18.9,18.2,17.2,15.7,92.0,83.0,89.0,43.0,1.1,0.6,A001
2015-01-01,07:00,0.0,886.5,886.5,886.2,,17.8,15.3,19.5,17.8,16.8,15.1,90.0,79.0,85.0,277.0,1.9,1.4,A001
2015-01-01,08:00,0.0,886.9,886.9,886.5,,17.3,16.5,17.8,16.9,16.5,15.2,96.0,85.0,95.0,301.0,1.9,1.0,A001
2015-01-01,09:00,0.0,887.3,887.3,886.8,18.1,18.1,16.5,18.4,17.2,16.8,16.0,95.0,88.0,90.0,2.0,1.5,0.6,A001


### Criação da tabela Silver de estações

Nesta célula, o DataFrame `df_bronze_stations` é transformado para a camada **Silver** por meio da remoção de colunas não necessárias (`lvl`, `lat`, `lon`) e do renomeio de campos para uma nomenclatura mais padronizada e consistente. O uso de `selectExpr` permite aplicar os aliases diretamente durante a seleção das colunas, resultando em um DataFrame mais limpo e adequado para consumo analítico.


In [0]:
df = df_bronze_stations.drop("lvl", "lat", "lon")

COLUMNS_TO_RENAME = {
    "region": "region",
    "state": "uf",
    "city_station": "city",
    "id_station": "code",
    "record_first": "first_record",
    "record_last": "last_record",
}

df_renamed = df.selectExpr(
    *[f"`{old}` AS {new}" for old, new in COLUMNS_TO_RENAME.items()]
)

display(df_renamed.limit(10))

region,uf,city,code,first_record,last_record
CO,DF,BRASILIA,A001,2000-05-07,2025-05-31
NE,BA,SALVADOR,A401,2000-05-13,2025-05-31
N,AM,MANAUS,A101,2000-05-09,2025-05-31
SE,RJ,ECOLOGIA AGRICOLA,A601,2000-05-07,2025-05-31
S,RS,PORTO ALEGRE,A801,2000-09-22,2025-05-31
CO,GO,GOIANIA,A002,2001-05-29,2025-05-19
CO,GO,MORRINHOS,A003,2001-05-25,2025-05-31
CO,MS,CAMPO GRANDE,A702,2001-09-10,2025-05-31
CO,MS,PONTA PORA,A703,2001-09-07,2025-05-31
CO,MS,TRES LAGOAS,A704,2001-09-03,2025-05-31


### Conversão de datas de registro das estações

Nesta célula, as colunas `first_record` e `last_record` do DataFrame `df_renamed` são convertidas para o tipo `timestamp`. Essa padronização facilita análises temporais e garante consistência no tratamento das informações de período de operação das estações.


In [0]:
df_silver_stations = (
    df_renamed
    .withColumn("first_record", F.col("first_record").cast("timestamp"))
    .withColumn("last_record", F.col("last_record").cast("timestamp"))
)
display(df_silver_stations)

region,uf,city,code,first_record,last_record
CO,DF,BRASILIA,A001,2000-05-07T00:00:00.000Z,2025-05-31T00:00:00.000Z
NE,BA,SALVADOR,A401,2000-05-13T00:00:00.000Z,2025-05-31T00:00:00.000Z
N,AM,MANAUS,A101,2000-05-09T00:00:00.000Z,2025-05-31T00:00:00.000Z
SE,RJ,ECOLOGIA AGRICOLA,A601,2000-05-07T00:00:00.000Z,2025-05-31T00:00:00.000Z
S,RS,PORTO ALEGRE,A801,2000-09-22T00:00:00.000Z,2025-05-31T00:00:00.000Z
CO,GO,GOIANIA,A002,2001-05-29T00:00:00.000Z,2025-05-19T00:00:00.000Z
CO,GO,MORRINHOS,A003,2001-05-25T00:00:00.000Z,2025-05-31T00:00:00.000Z
CO,MS,CAMPO GRANDE,A702,2001-09-10T00:00:00.000Z,2025-05-31T00:00:00.000Z
CO,MS,PONTA PORA,A703,2001-09-07T00:00:00.000Z,2025-05-31T00:00:00.000Z
CO,MS,TRES LAGOAS,A704,2001-09-03T00:00:00.000Z,2025-05-31T00:00:00.000Z


### Persistência da tabela Silver de estações

Nesta célula, o DataFrame `df_silver_stations` é salvo como a tabela `stations`, utilizando o modo **overwrite** para substituir qualquer versão existente. Isso garante que a tabela reflita integralmente as transformações mais recentes aplicadas na camada Silver.

In [0]:
df_silver_stations.write.mode("overwrite").saveAsTable("stations")

### Consulta de validação da tabela `stations`

Nesta célula, é executada uma consulta SQL simples para retornar uma amostra dos registros da tabela `stations`. O objetivo é validar visualmente que os dados foram persistidos corretamente e estão acessíveis para consulta no Databricks.

In [0]:
%sql
select * from stations limit 10

region,uf,city,code,first_record,last_record
CO,DF,BRASILIA,A001,2000-05-07T00:00:00.000Z,2025-05-31T00:00:00.000Z
NE,BA,SALVADOR,A401,2000-05-13T00:00:00.000Z,2025-05-31T00:00:00.000Z
N,AM,MANAUS,A101,2000-05-09T00:00:00.000Z,2025-05-31T00:00:00.000Z
SE,RJ,ECOLOGIA AGRICOLA,A601,2000-05-07T00:00:00.000Z,2025-05-31T00:00:00.000Z
S,RS,PORTO ALEGRE,A801,2000-09-22T00:00:00.000Z,2025-05-31T00:00:00.000Z
CO,GO,GOIANIA,A002,2001-05-29T00:00:00.000Z,2025-05-19T00:00:00.000Z
CO,GO,MORRINHOS,A003,2001-05-25T00:00:00.000Z,2025-05-31T00:00:00.000Z
CO,MS,CAMPO GRANDE,A702,2001-09-10T00:00:00.000Z,2025-05-31T00:00:00.000Z
CO,MS,PONTA PORA,A703,2001-09-07T00:00:00.000Z,2025-05-31T00:00:00.000Z
CO,MS,TRES LAGOAS,A704,2001-09-03T00:00:00.000Z,2025-05-31T00:00:00.000Z


### Padronização e seleção das colunas de dados meteorológicos

Nesta célula, o DataFrame `df_bronze_weather_data` é transformado para a camada **Silver** por meio da seleção e renomeio das colunas de interesse, aplicando uma nomenclatura padronizada e consistente. 
- Renomear colunas para o padrão inglês e snake_case. Objetivo é organizar o código, encurtar os nomes das colunas e melhorar a leitura.
- Remoção de colunas reduntantes. Por exemplo, temperatura do ar máxima na hora e mínima podem ser representadas pela temperatura do ar instantânea, que é a média na hora.

In [0]:
COLUMNS_TO_RENAME = {
    "ESTACAO": "station_code",
    "DATA__YYYY_MM_DD_": "date",
    "Hora_UTC": "time",
    "TEMPERATURA_DO_AR___BULBO_SECO__HORARIA___C_": "temperature",
    "TEMPERATURA_DO_PONTO_DE_ORVALHO___C_": "dew_point",
    "VENTO__VELOCIDADE_HORARIA__m/s_": "wind_speed",
    "VENTO__DIREÇÃO_HORARIA__gr______gr__": "wind_direction",
    "PRECIPITAÇÃO_TOTAL__HORÁRIO__mm_": "precipitation",
    "PRESSAO_ATMOSFERICA_AO_NIVEL_DA_ESTACAO__HORARIA__mB_": "pressure",
    "UMIDADE_RELATIVA_DO_AR__HORARIA__%_": "relative_humidity",
    "VENTO__RAJADA_MAXIMA__m/s_": "wind_gust",
    "RADIACAO_GLOBAL__KJ/m²_": "radiation",  
}
df_renamed = df_bronze_weather_data.selectExpr(
    *[f"`{old}` AS {new}" for old, new in COLUMNS_TO_RENAME.items()]
)

display(df_renamed.limit(10))

station_code,date,time,temperature,dew_point,wind_speed,wind_direction,precipitation,pressure,relative_humidity,wind_gust,radiation
A001,2015-01-01,00:00,20.1,15.7,0.5,84.0,0.0,886.8,76.0,1.0,
A001,2015-01-01,01:00,19.8,16.0,0.4,20.0,0.0,887.5,79.0,1.1,
A001,2015-01-01,02:00,18.6,15.7,1.0,284.0,0.0,888.1,84.0,1.5,
A001,2015-01-01,03:00,18.6,15.9,0.7,265.0,0.0,887.8,84.0,1.3,
A001,2015-01-01,04:00,19.3,16.0,0.2,110.0,0.0,887.1,81.0,0.9,
A001,2015-01-01,05:00,18.9,16.1,0.5,247.0,0.0,886.6,83.0,0.9,
A001,2015-01-01,06:00,18.5,16.7,0.6,43.0,0.0,886.3,89.0,1.1,
A001,2015-01-01,07:00,17.8,15.3,1.4,277.0,0.0,886.5,85.0,1.9,
A001,2015-01-01,08:00,17.3,16.5,1.0,301.0,0.0,886.9,95.0,1.9,
A001,2015-01-01,09:00,18.1,16.5,0.6,2.0,0.0,887.3,90.0,1.5,18.1


### Conversão de tipos e tratamento de valores ausentes

Nesta célula, as colunas numéricas do DataFrame `df_silver_weather_data` são convertidas para o tipo `double` utilizando `try_cast`, garantindo maior robustez frente a valores inválidos. Além disso, valores ausentes na coluna `radiation` são preenchidos com zero, preparando o conjunto de dados para análises e agregações posteriores.


In [0]:
df_typed = (
    df_renamed
    .withColumn("temperature", F.expr("try_cast(temperature as double)"))
    .withColumn("dew_point", F.expr("try_cast(dew_point as double)"))
    .withColumn("wind_speed", F.expr("try_cast(wind_speed as double)"))
    .withColumn("wind_direction", F.expr("try_cast(wind_direction as double)"))
    .withColumn("precipitation", F.expr("try_cast(precipitation as double)"))
    .withColumn("pressure", F.expr("try_cast(pressure as double)"))
    .withColumn("relative_humidity", F.expr("try_cast(relative_humidity as double)"))
    .withColumn("wind_gust", F.expr("try_cast(wind_gust as double)"))
    .withColumn("relative_humidity", F.expr("try_cast(relative_humidity as double)"))
    .withColumn("radiation", F.expr("try_cast(radiation as double)"))
    .fillna({"radiation": 0.0})
)

print("Tipos originais de cada coluna:")
df_renamed.printSchema()

print("Tipos corrigidos:")
df_typed.printSchema()


Tipos originais de cada coluna:
root
 |-- station_code: string (nullable = true)
 |-- date: string (nullable = true)
 |-- time: string (nullable = true)
 |-- temperature: string (nullable = true)
 |-- dew_point: string (nullable = true)
 |-- wind_speed: string (nullable = true)
 |-- wind_direction: string (nullable = true)
 |-- precipitation: string (nullable = true)
 |-- pressure: string (nullable = true)
 |-- relative_humidity: string (nullable = true)
 |-- wind_gust: string (nullable = true)
 |-- radiation: string (nullable = true)

Tipos corrigidos:
root
 |-- station_code: string (nullable = true)
 |-- date: string (nullable = true)
 |-- time: string (nullable = true)
 |-- temperature: double (nullable = true)
 |-- dew_point: double (nullable = true)
 |-- wind_speed: double (nullable = true)
 |-- wind_direction: double (nullable = true)
 |-- precipitation: double (nullable = true)
 |-- pressure: double (nullable = true)
 |-- relative_humidity: double (nullable = true)
 |-- wind_gus

### Filtragem de registros válidos

Nesta célula, o DataFrame `df_typed` é filtrado para manter apenas os registros que possuem valores não nulos em todas as principais variáveis meteorológicas. Esse passo garante a consistência e a qualidade dos dados, preparando o conjunto resultante para análises que exigem informações completas.


In [0]:
df_valid = df_typed.filter(
    F.col("temperature").isNotNull() &
    F.col("dew_point").isNotNull() &
    F.col("wind_speed").isNotNull() &
    F.col("wind_direction").isNotNull() &
    F.col("precipitation").isNotNull() &
    F.col("pressure").isNotNull() &
    F.col("relative_humidity").isNotNull() &
    F.col("wind_gust").isNotNull() &
    F.col("radiation").isNotNull()
)

display(df_valid)

station_code,date,time,temperature,dew_point,wind_speed,wind_direction,precipitation,pressure,relative_humidity,wind_gust,radiation
A001,2000-05-07,12:00,22.6,14.7,1.8,126.0,0.0,888.2,61.0,3.8,1506.0
A001,2000-05-07,13:00,24.2,14.7,2.7,75.0,0.0,888.4,55.0,4.7,2230.0
A001,2000-05-07,14:00,25.0,14.1,2.0,117.0,0.0,888.1,51.0,4.9,2675.0
A001,2000-05-07,15:00,26.2,13.2,2.5,58.0,0.0,887.4,44.0,5.8,2915.0
A001,2000-05-07,16:00,26.7,14.0,2.4,167.0,0.0,886.5,46.0,5.8,2523.0
A001,2000-05-07,17:00,26.6,13.6,1.8,178.0,0.0,885.9,45.0,4.3,2435.0
A001,2000-05-07,18:00,28.0,12.4,1.8,125.0,0.0,885.5,38.0,6.3,2530.0
A001,2000-05-07,19:00,26.6,12.5,1.1,53.0,0.0,885.6,41.0,3.8,1412.0
A001,2000-05-07,20:00,25.8,12.7,1.5,109.0,0.0,885.9,44.0,3.0,540.0
A001,2000-05-07,21:00,24.1,13.4,1.3,197.0,0.0,886.2,51.0,3.2,34.0


### Criação da coluna de data e hora

Nesta célula, as colunas `date` e `time` são combinadas para formar um campo único de data e hora. O valor resultante é ajustado para o formato esperado e convertido para o tipo `timestamp`, originando a coluna `datetime`. As colunas intermediárias utilizadas no processo são removidas, mantendo o DataFrame final mais limpo e adequado para análises temporais.


In [0]:
df_formatted = (
    df_valid
    .withColumn("datetime_str", F.concat(F.col("date"), F.col("time")))
    .withColumn("datetime_str", F.regexp_replace("datetime_str", "00 UTC", ":00"))
    .withColumn("datetime", F.to_timestamp("datetime_str", "yyyy-MM-ddHH:mm"))
    .drop("date", "time", "datetime_str")
)

### Persistência da tabela Silver de dados meteorológicos

Nesta célula, o DataFrame `formatted_df` é salvo como a tabela `weather_data`, utilizando o modo **overwrite** para substituir qualquer versão existente. Isso garante que os dados tratados e padronizados estejam disponíveis de forma consistente para consultas e análises posteriores.


In [0]:
df_formatted.write.mode("overwrite").saveAsTable("weather_data")

### Consulta de validação da tabela `weather_data`

Nesta célula, é executada uma consulta SQL para exibir uma amostra dos registros da tabela `weather_data`, verificando se os dados foram persistidos corretamente após as transformações aplicadas.


In [0]:
%sql
select * from weather_data limit 100

station_code,temperature,dew_point,wind_speed,wind_direction,precipitation,pressure,relative_humidity,wind_gust,radiation,datetime
A001,22.6,14.7,1.8,126.0,0.0,888.2,61.0,3.8,1506.0,2000-05-07T12:00:00.000Z
A001,24.2,14.7,2.7,75.0,0.0,888.4,55.0,4.7,2230.0,2000-05-07T13:00:00.000Z
A001,25.0,14.1,2.0,117.0,0.0,888.1,51.0,4.9,2675.0,2000-05-07T14:00:00.000Z
A001,26.2,13.2,2.5,58.0,0.0,887.4,44.0,5.8,2915.0,2000-05-07T15:00:00.000Z
A001,26.7,14.0,2.4,167.0,0.0,886.5,46.0,5.8,2523.0,2000-05-07T16:00:00.000Z
A001,26.6,13.6,1.8,178.0,0.0,885.9,45.0,4.3,2435.0,2000-05-07T17:00:00.000Z
A001,28.0,12.4,1.8,125.0,0.0,885.5,38.0,6.3,2530.0,2000-05-07T18:00:00.000Z
A001,26.6,12.5,1.1,53.0,0.0,885.6,41.0,3.8,1412.0,2000-05-07T19:00:00.000Z
A001,25.8,12.7,1.5,109.0,0.0,885.9,44.0,3.0,540.0,2000-05-07T20:00:00.000Z
A001,24.1,13.4,1.3,197.0,0.0,886.2,51.0,3.2,34.0,2000-05-07T21:00:00.000Z


In [0]:
spark.sql("""
    comment on table mvp.silver.stations is
    'The table contains information about various stations, including their locations and unique identifiers. It can be used for analyzing station distribution across different states and cities, tracking the operational timeline of each station, and managing station-related data for reporting or operational purposes.'
""")

COLUMN_COMMENTS = [
    ("first_record", "Timestamp of the first recorded data entry from the station."),
    ("last_record", "Timestamp of the most recent data entry"),
]

for column, comment in COLUMN_COMMENTS:
    spark.sql(f"comment on column mvp.silver.stations.`{column}` is '{comment}'")

In [0]:
df_describe = spark.sql("describe extended mvp.silver.stations")
df_describe = df_describe.withColumn("_id", F.monotonically_increasing_id())
target_id = df_describe.filter("col_name = 'Catalog'").select("_id").first()._id

table_describe = df_describe.filter(f"_id >= {target_id}").limit(9)
display(table_describe.drop("_id"))

display(spark.sql("describe mvp.silver.stations"))

col_name,data_type,comment
Catalog,mvp,
Database,silver,
Table,stations,
Created Time,Sun Dec 21 16:16:41 UTC 2025,
Last Access,UNKNOWN,
Created By,Spark,
Statistics,"10210 bytes, 615 rows",
Type,MANAGED,
Comment,"The table contains information about various stations, including their locations and unique identifiers. It can be used for analyzing station distribution across different states and cities, tracking the operational timeline of each station, and managing station-related data for reporting or operational purposes.",


col_name,data_type,comment
region,string,The geographical area or zone where the station is located
uf,string,State where the station is located
city,string,The city where the station is located
code,string,Unique identifier assigned to each station
first_record,timestamp,Timestamp of the first recorded data entry from the station.
last_record,timestamp,Timestamp of the most recent data entry


In [0]:
spark.sql("""
    comment on table mvp.silver.weather_data is
    'The table contains hourly weather data collected from various stations. It includes information such as temperature, humidity, precipitation, and wind conditions. Possible use cases include analyzing weather patterns, conducting climate research, and supporting agricultural planning by understanding local weather conditions.'
""")

COLUMN_COMMENTS = [
    ("datetime", "Datetime of the observation recorded."),
    ("precipitation", "Total hourly precipitation measured in millimeters"),
    ("pressure", "Hourly atmospheric pressure at the station level, measured in millibars (mB)"),
    ("radiation", "Amount of global radiation recorded in kilojoules per square meter"),
    ("temperature", "Hourly dry-bulb air temperature recorded in degrees Celsius."),
    ("dew_point", "Dew point temperature in Celsius, representing the air temperature at which condensation occurs."),
    ("relative_humidity", "Percentage of relative humidity measured for each hour"),
    ("wind_direction", "Hourly wind direction measured in degrees"),
    ("wind_gust", "Highest recorded wind gust speed during the hour, measured in meters per second"),
    ("wind_speed", "Measured wind speed per hour, expressed in meters per second"),
]

for column, comment in COLUMN_COMMENTS:
    spark.sql(f"comment on column mvp.silver.weather_data.`{column}` is '{comment}'")

In [0]:
df_describe = spark.sql("describe extended mvp.silver.weather_data")
df_describe = df_describe.withColumn("_id", F.monotonically_increasing_id())
target_id = df_describe.filter("col_name = 'Catalog'").select("_id").first()._id

table_describe = df_describe.filter(f"_id >= {target_id}").limit(9)
display(table_describe.drop("_id"))

display(spark.sql("describe mvp.silver.weather_data"))

col_name,data_type,comment
Catalog,mvp,
Database,silver,
Table,weather_data,
Created Time,Sun Dec 21 16:16:50 UTC 2025,
Last Access,UNKNOWN,
Created By,Spark,
Statistics,"8925494 bytes, 971203 rows",
Type,MANAGED,
Comment,"The table contains hourly weather data collected from various stations. It includes information such as temperature, humidity, precipitation, and wind conditions. Possible use cases include analyzing weather patterns, conducting climate research, and supporting agricultural planning by understanding local weather conditions.",


col_name,data_type,comment
station_code,string,Station identifier where the hourly weather data was recorded
temperature,double,Hourly dry-bulb air temperature recorded in degrees Celsius.
dew_point,double,"Dew point temperature in Celsius, representing the air temperature at which condensation occurs."
wind_speed,double,"Measured wind speed per hour, expressed in meters per second"
wind_direction,double,Hourly wind direction measured in degrees
precipitation,double,Total hourly precipitation measured in millimeters
pressure,double,"Hourly atmospheric pressure at the station level, measured in millibars (mB)"
relative_humidity,double,Percentage of relative humidity measured for each hour
wind_gust,double,"Highest recorded wind gust speed during the hour, measured in meters per second"
radiation,double,Amount of global radiation recorded in kilojoules per square meter
