# Download GHCNd Data

This downloads the Global Historical Climatology Network daily (GHCNd). This downloads the required data for the 1.2 Surface Temperature indicators and 1.3 Rainfall indicators

### Description from NOAA:

The Global Historical Climatology Network daily (GHCNd) is an integrated database of daily climate summaries from land surface stations across the globe. GHCNd is made up of daily climate records from numerous sources that have been integrated and subjected to a common suite of quality assurance reviews.

GHCNd contains records from more than 100,000 stations in 180 countries and territories. NCEI provides numerous daily variables, including maximum and minimum temperature, total daily precipitation, snowfall, and snow depth. About half the stations only report precipitation. Both record length and period of record vary by station and cover intervals ranging from less than a year to more than 175 years.


Further information can be found at https://www.ncei.noaa.gov/products/land-based-station/global-historical-climatology-network-daily

In [1]:
import os
import urllib 
import pandas as pd
import numpy as np

sys.path.append("../../../../indicators_setup")
from ind_setup.plotting import plot_timeseries
from ind_setup.plotting_int import plot_timeseries_interactive

In [2]:
import requests
from bs4 import BeautifulSoup
import re

In [3]:
country_codes = requests.get('https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-countries.txt').text
country_codes = country_codes.split('\n')

# Create lists
codes = [line.split(' ')[0] for line in country_codes]  # Code
countries = [' '.join(line.split(' ')[1:]).strip() for line in country_codes]  # Country

# DataFrame
df_countries = pd.DataFrame({
    'Code': codes,
    'Country': countries
})


In [4]:
country = 'Palau'
df_country = df_countries.loc[df_countries['Country'] == 'Palau']
df_country

Unnamed: 0,Code,Country
161,PS,Palau


In [5]:
url_stations = 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt'

In [6]:
stations = requests.get(url_stations).text
stations = stations.split('\n')

# Procesar dinámicamente cada línea
processed_data = []
for line in stations:

    if len(line) > 0:
        parts = line.split()  
        station_id = parts[0]  # ID
        latitude = float(parts[1])  # Latitude
        longitude = float(parts[2])  # Longitude
        elevation = float(parts[3])  # Elevation
        name = " ".join(parts[4:])  # Name
        processed_data.append([station_id, latitude, longitude, elevation, name])

# Crear DataFrame
df_stations = pd.DataFrame(processed_data, columns=['ID', 'Latitud', 'Longitud', 'Elevación', 'Nombre'])

In [7]:
df_country_stations = df_stations[df_stations['ID'].str.startswith(df_country.Code.values[0])]

In [8]:
GHCND_dir = "https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/"

In [9]:
LONS, LATS = [], []
for i in range(len(df_country_stations)):
    
    url_download = GHCND_dir + df_country_stations.iloc[i]['ID'] + '.csv'

    df = pd.read_csv(url_download, na_values=['-9999'])
    df.index = pd.to_datetime(df['DATE'])
    LONS.append(df.iloc[0]['LONGITUDE'])
    LATS.append(df.iloc[0]['LATITUDE'])

LONS = np.array(LONS)
LONS[LONS < 0] += 360
LATS = np.array(LATS)

  df = pd.read_csv(url_download, na_values=['-9999'])


In [12]:
df

Unnamed: 0_level_0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,PRCP,PRCP_ATTRIBUTES,SNOW,SNOW_ATTRIBUTES,...,WT08,WT08_ATTRIBUTES,WT14,WT14_ATTRIBUTES,WT16,WT16_ATTRIBUTES,WT18,WT18_ATTRIBUTES,WV20,WV20_ATTRIBUTES
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1947-07-01,PSW00040309,1947-07-01,7.33674,134.47686,27.4,"KOROR, PW PS",,,,,...,,,,,,,,,,
1947-07-02,PSW00040309,1947-07-02,7.33674,134.47686,27.4,"KOROR, PW PS",,,,,...,,,,,,,,,,
1947-07-03,PSW00040309,1947-07-03,7.33674,134.47686,27.4,"KOROR, PW PS",,,,,...,,,,,,,,,,
1947-07-04,PSW00040309,1947-07-04,7.33674,134.47686,27.4,"KOROR, PW PS",,,,,...,,,,,,,,,,
1947-07-05,PSW00040309,1947-07-05,7.33674,134.47686,27.4,"KOROR, PW PS",,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-06,PSW00040309,2024-11-06,7.33674,134.47686,27.4,"KOROR, PW PS",33.0,",,H",,,...,,,,,,,,,,
2024-11-08,PSW00040309,2024-11-08,7.33674,134.47686,27.4,"KOROR, PW PS",0.0,",,H",,,...,,,,,,,,,,
2024-11-09,PSW00040309,2024-11-09,7.33674,134.47686,27.4,"KOROR, PW PS",20.0,",,H",,,...,,,,,,,,,,
2024-11-10,PSW00040309,2024-11-10,7.33674,134.47686,27.4,"KOROR, PW PS",0.0,",,H",,,...,,,,,,,,,,


In [10]:
import folium

# Create a map centered at the first station's coordinates
map = folium.Map(location=[LATS[0], LONS[0]], zoom_start=10)

# Add markers for each station
for i in range(len(df_country_stations)):
    folium.Marker(
        location=[LATS[i], LONS[i]],
        popup=df_country_stations.iloc[i]['ID'],
        icon=folium.Icon(color='lightred')  # Change the color to red
    ).add_to(map)

map


In [14]:
if 'TMIN' in df.columns:
    df['TMIN'] = df['TMIN'] / 10
    df['TMAX'] = df['TMAX'] / 10

    dict_plot = [{'data' : df, 'var' : 'TMIN', 'ax' : 1, 'label' : 'TMIN: ºC'},
        {'data' : df, 'var' : 'TMAX', 'ax' : 1, 'label' : 'TMAX: ºC'},
        ]


    plot_timeseries_interactive(dict_plot, trendline = True)


NameError: name 'plot_timeseries_interactive' is not defined