## Isolating weather Stations in wine areas

In [1]:
import pandas as pd
import os
import requests
from dotenv import load_dotenv

In [2]:
 # Load environment variables from .env file
load_dotenv()

# Get the API key
api_key = os.getenv('NOAA_CLIMATE_DATA')
token = api_key

In [3]:
# Load the Weather Data
file_path = 'Resources/weather_all_country_codes3.csv'
df = pd.read_csv(file_path)

# Pivot the Weather DataFrame
pivoted_df = df.pivot_table(index=['station', 'date'], columns='datatype', values='value').reset_index()

# Update the 'country' values
pivoted_df['country'] = pivoted_df['station'].str[:2]

# Load the Station Codes Data
column_names = ['station_id', 'latitude', 'longitude', 'elevation', 'location']
colspecs = [(0, 11), (12, 20), (21, 30), (31, 37), (38, None)]
file_path_txt = 'Resources/stationcodes.txt'
df_station_codes = pd.read_fwf(file_path_txt, colspecs=colspecs, header=None, names=column_names)

# Rename Columns for Merging
df_station_codes.rename(columns={'station_id': 'station'}, inplace=True)

# Clean Column Names
pivoted_df.columns = pivoted_df.columns.str.strip()
df_station_codes.columns = df_station_codes.columns.str.strip()

# Merge the DataFrames
merged_df = pd.merge(pivoted_df, df_station_codes, on='station', how='left')

# Filter out the stations without location data
stations_without_location = merged_df[merged_df[['latitude', 'longitude', 'location']].isnull().any(axis=1)]

# Display the stations without location data
stations_without_location


Unnamed: 0,station,date,DP10,DP1X,DT32,DX70,DX90,EMNT,EMXP,EMXT,...,MNPN,MXPN,PRCP,TAVG,TMAX,country,latitude,longitude,elevation,location
0,GHCND:AE000041196,2012-01-01T00:00:00,,,,346.0,226.0,,,121.0,...,,,,,96.0,GH,,,,
1,GHCND:AE000041196,2023-01-01T00:00:00,8.0,1.0,,,,,1.06,,...,,,3.42,,,GH,,,,
2,GHCND:AEM00041194,2004-01-01T00:00:00,4.0,0.0,,,,,0.90,,...,,,2.31,,,GH,,,,
3,GHCND:AEM00041194,2006-01-01T00:00:00,7.0,1.0,,,,,1.77,,...,,,4.08,,,GH,,,,
4,GHCND:AEM00041194,2010-01-01T00:00:00,5.0,1.0,,,,,3.15,,...,,,4.83,,,GH,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31219,GHCND:WA010101860,2003-01-01T00:00:00,35.0,0.0,0.0,348.0,117.0,38.0,0.87,100.0,...,,,12.60,72.3,85.5,GH,,,,
31220,GHCND:WA010101860,2022-01-01T00:00:00,,,,348.0,126.0,,,99.0,...,,,,,86.5,GH,,,,
31221,GHCND:WA012084750,1998-01-01T00:00:00,,,,340.0,210.0,,,103.0,...,,,,,90.7,GH,,,,
31222,GHCND:WA012084750,1999-01-01T00:00:00,,,,343.0,132.0,,,102.0,...,,,,,88.3,GH,,,,


In [4]:
# Function to get location data from the NCEI API
def get_location_data(station_id, token):
    base_url = f"https://www.ncei.noaa.gov/cdo-web/api/v2/locations/{station_id}"
    headers = {"token": token}
    response = requests.get(base_url, headers=headers)
    try:
        response.raise_for_status()  # Raise an HTTPError for bad responses
        json_response = response.json()
        return {
            "latitude": json_response.get("latitude"),
            "longitude": json_response.get("longitude"),
            "elevation": json_response.get("elevation"),
            "location": json_response.get("name")
        }
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")  # HTTP error
    except requests.exceptions.RequestException as req_err:
        print(f"Request error occurred: {req_err}")  # Request error
    except ValueError as json_err:
        print(f"JSON decode error occurred: {json_err}")  # JSON decode error
        print(f"Response content: {response.content}")  # Print response content for debugging
    return None

# Function to update the DataFrame with location data
def update_station_data(row, token):
    station_id = row['station']
    location_data = get_location_data(station_id, token)
    if location_data:
        row['latitude'] = location_data['latitude']
        row['longitude'] = location_data['longitude']
        row['elevation'] = location_data['elevation']
        row['location'] = location_data['location']
    return row



# Update the stations without location data
stations_without_location = stations_without_location.apply(update_station_data, axis=1, token=token)

# Merge updated data back into the main DataFrame
merged_df.update(stations_without_location)

stations_without_location


HTTP error occurred: 503 Server Error: Service Unavailable for url: https://www.ncei.noaa.gov/cdo-web/api/v2/locations/GHCND:AGE00147716
HTTP error occurred: 503 Server Error: Service Unavailable for url: https://www.ncei.noaa.gov/cdo-web/api/v2/locations/GHCND:AGE00147718
HTTP error occurred: 503 Server Error: Service Unavailable for url: https://www.ncei.noaa.gov/cdo-web/api/v2/locations/GHCND:AGM00060360
HTTP error occurred: 503 Server Error: Service Unavailable for url: https://www.ncei.noaa.gov/cdo-web/api/v2/locations/GHCND:AGM00060515
HTTP error occurred: 503 Server Error: Service Unavailable for url: https://www.ncei.noaa.gov/cdo-web/api/v2/locations/GHCND:AGM00060549
HTTP error occurred: 503 Server Error: Service Unavailable for url: https://www.ncei.noaa.gov/cdo-web/api/v2/locations/GHCND:AGM00060550
HTTP error occurred: 503 Server Error: Service Unavailable for url: https://www.ncei.noaa.gov/cdo-web/api/v2/locations/GHCND:AGM00060555
HTTP error occurred: 503 Server Error: Se

KeyboardInterrupt: 