Here are all the codes that have been used to access the API for English, Scottish, and Welsh rainfall, in order to retrieve station ID, coordinates, and rainfall amount. This data is then recorded in a CSV file. Additionally, the county name is extracted based on the coordinates using the OpenCage package. The code also averages out the rainfall amount for each county in a separate column. This page also contains a sort of evolutionary history of the code, detailing how it developed over time into the final version that performs all these tasks.

### 1. England- Rainfall 

In [None]:
import requests

def get_rainfall_measurements():
    url = "http://environment.data.gov.uk/flood-monitoring/id/measures?parameter=rainfall"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        measurements = data['items']
        # Extract only measurements with data
        rainfall_info = []
        total_rainfall = 0  # Initialize total rainfall variable
        for measurement in measurements:
            if 'latestReading' in measurement:
                station_code = measurement.get('stationReference')
                rainfall_value = measurement['latestReading'].get('value')
                if rainfall_value is not None:
                    total_rainfall += rainfall_value  # Summing up the rainfall
                rainfall_info.append((station_code, rainfall_value))
        return rainfall_info, total_rainfall
    else:
        return "Error: Unable to fetch data", 0

rainfall_measurements, total_rainfall = get_rainfall_measurements()

# Print the total rainfall
print(f"Total Rainfall: {total_rainfall}")

# Now print each station's rainfall data
for station_code, rainfall in rainfall_measurements:
    print(f"Station Code: {station_code}, Rainfall: {rainfall}")


<br>

### 2. England- Coordinates

In [None]:
import requests

def fetch_station_data():
    url = 'https://environment.data.gov.uk/flood-monitoring/id/stations?parameter=rainfall'
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if 'items' in data:
            for station in data['items']:
                latitude = station.get('lat')
                longitude = station.get('long')
                print(f"Station: {station.get('notation', 'Unknown')}, Latitude: {latitude}, Longitude: {longitude}")
        else:
            print('No data available')
    else:
        print(f"Error fetching data: {response.status_code}")

fetch_station_data()


<br>

### 3. England- Both Coordinates and Rainfall (takes long)

In [None]:
import requests

def fetch_station_data_with_measurements():
    url = 'https://environment.data.gov.uk/flood-monitoring/id/stations?parameter=rainfall'
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if 'items' in data:
            for station in data['items']:
                # Fetch station details
                latitude = station.get('lat')
                longitude = station.get('long')
                station_id = station.get('notation', 'Unknown')

                # Fetch latest measurement for each station
                if 'measures' in station and station['measures']:
                    measure_url = station['measures'][0]['@id']
                    measure_response = requests.get(measure_url + '/readings?latest')
                    if measure_response.status_code == 200:
                        measure_data = measure_response.json()
                        if 'items' in measure_data and measure_data['items']:
                            latest_reading = measure_data['items'][0]
                            rainfall_value = latest_reading.get('value')
                            print(f"Station ID: {station_id}, Latitude: {latitude}, Longitude: {longitude}, Rainfall Measurement (mm): {rainfall_value}")
                        else:
                            print(f"Station ID: {station_id}, Latitude: {latitude}, Longitude: {longitude}, No recent measurement data available")
                    else:
                        print(f"Error fetching measurement data for station {station_id}")
                else:
                    print(f"Station ID: {station_id}, Latitude: {latitude}, Longitude: {longitude}, No measurement data available")
        else:
            print('No station data available')
    else:
        print(f"Error fetching station data: {response.status_code}")

fetch_station_data_with_measurements()


<br>

### 4. Scotland- Coordinates

In [None]:
import requests

def get_rain_stations_coordinates():
    base_url = "https://www2.sepa.org.uk/rainfall"
    endpoint = f"{base_url}/api/Stations"
    
    response = requests.get(endpoint)
    if response.status_code != 200:
        return "Failed to retrieve data from the API"

    stations_data = response.json()
    coordinates = []

    for station in stations_data:
        latitude = station.get('station_latitude')
        longitude = station.get('station_longitude')
        coordinates.append((latitude, longitude))

    return coordinates

coordinates = get_rain_stations_coordinates()
print(coordinates)


<br>

### 5. Wales- Rainfall and Coordinates (from Lauren)
#### But it collects river data as well, so we can't use it

In [None]:
import pandas as pd
import csv
import requests

# Function to fetch rainfall measurements
def get_wales_rainfall_measurements():
    walesurl = 'https://api.naturalresources.wales/rivers-and-seas/v1/api/StationData'
    headers = {'Ocp-Apim-Subscription-Key': '413a14f470f64b70a010cfa3b4ed6a79'}
    walesresponse = requests.get(walesurl, headers=headers)
    if walesresponse.status_code == 200:
        walesdata = walesresponse.json()
        return walesdata
    else:
        return []

# Call the function and store the returned data
wales_rainfall_measurements = get_wales_rainfall_measurements()

# If there are no measurements, exit the script
if not wales_rainfall_measurements:
    print("No measurements found.")
else:
    # Specify the name of your CSV file
    filename = "wales_rainfall_measurements.csv"

    # Open a file to write, 'w' mode is for writing, newline and encoding specified for file format
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        # Create a CSV writer object
        writer = csv.writer(file)

        # Writing the header row of the CSV file
        writer.writerow(["Station Name", "Latitude", "Longitude", "Measurement Value (mm)"])

        # Iterating through each measurement in the data
        for measurement in wales_rainfall_measurements:
            # Extracting station name, latitude, longitude, and rainfall value
            wales_station_name = measurement.get('nameEN')
            wales_coordinates = measurement.get('coordinates')
            wales_latitude = wales_coordinates.get('latitude') if wales_coordinates else None
            wales_longitude = wales_coordinates.get('longitude') if wales_coordinates else None
            wales_parameters = measurement.get('parameters')
            wales_rainfall_value = wales_parameters[0].get('latestValue') if wales_parameters else None

            # Writing the data row to the CSV file
            writer.writerow([wales_station_name, wales_latitude, wales_longitude, wales_rainfall_value])

    print(f"Data has been written to {filename}")

<br>

### 6. Wales- Rainfall and Coordinates (from Imran)
#### This one only collects rainfall data

In [None]:
import requests
import pandas as pd
from IPython.display import display

# API key should be kept secure
api_key = '413a14f470f64b70a010cfa3b4ed6a79'

# Function to fetch data from API
def get_wales_api_data(api_key):
    url = 'https://api.naturalresources.wales/rivers-and-seas/v1/api/StationData'
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        return []

# Call the function and store the returned data
api_data = get_wales_api_data(api_key)

# Prepare data for DataFrame
data = []
for station in api_data:  # Ensure 'api_data' is the variable containing your API data
    latitude = station['coordinates']['latitude']
    longitude = station['coordinates']['longitude']
    location = station['location']
    
    # Find the 'Rainfall' parameter
    latest_value = None
    for parameter in station['parameters']:
        if parameter['paramNameEN'] == 'Rainfall':
            latest_value = parameter['latestValue']
            break

    # Only add to data if latest_value is found
    if latest_value is not None:
        data.append([latitude, longitude, location, latest_value])

# Convert list to DataFrame
df = pd.DataFrame(data, columns=["Latitude", "Longitude", "Location", "Latest Value (mm)"])

# Display the DataFrame
display(df)

<br>

### 7. Combines all the API calls and saves the data in a CSV

In [None]:
import requests
import csv

# Function to fetch station data with coordinates for England
def fetch_station_data_eng():
    url = 'https://environment.data.gov.uk/flood-monitoring/id/stations?parameter=rainfall'
    response = requests.get(url)
    eng_station_data = {}
    if response.status_code == 200:
        data = response.json()
        for station in data['items']:
            eng_station_data[station.get('notation')] = {
                'latitude': station.get('lat'),
                'longitude': station.get('long')
            }
    return eng_station_data

# Function to fetch rainfall measurements for England
def get_rainfall_data_eng():
    eng_url = "http://environment.data.gov.uk/flood-monitoring/id/measures?parameter=rainfall"
    eng_response = requests.get(eng_url)
    if eng_response.status_code == 200:
        eng_data = eng_response.json()
        return eng_data['items']
    return []

# Function to fetch station data with coordinates for Scotland
def fetch_station_data_sco():
    sco_url = "https://www2.sepa.org.uk/rainfall/api/Stations"
    sco_response = requests.get(sco_url)
    sco_station_data = {}
    if sco_response.status_code == 200:
        sco_stations = sco_response.json()
        for station in sco_stations:
            sco_station_data[station['station_no']] = {
                'latitude': station.get('station_latitude'),
                'longitude': station.get('station_longitude')
            }
    return sco_station_data

# Function to fetch latest hourly rainfall data for Scotland
def get_rainfall_data_sco(station_id):
    sco_url = f"https://www2.sepa.org.uk/rainfall/api/Hourly/{station_id}?all=true"
    sco_response = requests.get(sco_url)
    if sco_response.status_code == 200 and sco_response.json():
        return sco_response.json()[-1]
    return None

# Function to fetch station data with rainfall measurements for Wales
def get_wales_rainfall_data(api_key):
    url = 'https://api.naturalresources.wales/rivers-and-seas/v1/api/StationData'
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    response = requests.get(url, headers=headers)
    wales_rainfall_data = []
    if response.status_code == 200:
        wales_data = response.json()
        for station in wales_data:
            station_id = station['location']
            latitude = station['coordinates']['latitude']
            longitude = station['coordinates']['longitude']
            
            # Find the 'Rainfall' parameter
            rainfall = None
            for parameter in station['parameters']:
                if parameter['paramNameEN'] == 'Rainfall':
                    rainfall = parameter['latestValue']
                    break

            # Add the station data to the list if rainfall data is found
            if rainfall is not None:
                wales_rainfall_data.append({
                    'station_id': station_id,
                    'rainfall': rainfall,
                    'latitude': latitude,
                    'longitude': longitude
                })
    return wales_rainfall_data

#  API key should be kept secure
api_key = '413a14f470f64b70a010cfa3b4ed6a79'

# Fetching and processing data for England and Scotland
eng_station_coordinates = fetch_station_data_eng()
eng_rainfall_data = get_rainfall_data_eng()
sco_station_coordinates = fetch_station_data_sco()
sco_rainfall_data = {station_id: get_rainfall_data_sco(station_id) for station_id in sco_station_coordinates}

# Fetching and processing data for Wales
wales_rainfall_data = get_wales_rainfall_data(api_key)

# Combine the data
combined_data = []

# Process and combine England data
for measurement in eng_rainfall_data:
    station_id = measurement.get('stationReference')
    rainfall = measurement.get('latestReading', {}).get('value')
    coordinates = eng_station_coordinates.get(station_id, {'latitude': None, 'longitude': None})
    combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'England'])

# Process and combine Scotland data
for station_id, latest_hourly_data in sco_rainfall_data.items():
    if latest_hourly_data:
        rainfall = latest_hourly_data['Value']
        coordinates = sco_station_coordinates[station_id]
        combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'Scotland'])

# Process and combine Wales data
for station_data in wales_rainfall_data:
    combined_data.append([
        station_data['station_id'],
        station_data['rainfall'],
        station_data['latitude'],
        station_data['longitude'],
        'Wales'
    ])

# Write to CSV
filename = "final_combined_rainfall_measurements.csv"
with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Station ID", "Rainfall Measurement (mm)", "Latitude", "Longitude", "Country"])
    writer.writerows(combined_data)

print(f"Data has been written to {filename}")


### 8. Exludes stations where there are no cordinates data. Otherwise, does similar job as the previous (7) one

In [2]:
import requests
import csv

# Function to fetch station data with coordinates for England
def fetch_station_data_eng():
    url = 'https://environment.data.gov.uk/flood-monitoring/id/stations?parameter=rainfall'
    response = requests.get(url)
    eng_station_data = {}
    if response.status_code == 200:
        data = response.json()
        for station in data['items']:
            if station.get('lat') is not None and station.get('long') is not None:
                eng_station_data[station.get('notation')] = {
                    'latitude': station.get('lat'),
                    'longitude': station.get('long')
                }
    return eng_station_data

# Function to fetch rainfall measurements for England
def get_rainfall_data_eng():
    eng_url = "http://environment.data.gov.uk/flood-monitoring/id/measures?parameter=rainfall"
    eng_response = requests.get(eng_url)
    if eng_response.status_code == 200:
        eng_data = eng_response.json()
        return eng_data['items']
    return []

# Function to fetch station data with coordinates for Scotland
def fetch_station_data_sco():
    sco_url = "https://www2.sepa.org.uk/rainfall/api/Stations"
    sco_response = requests.get(sco_url)
    sco_station_data = {}
    if sco_response.status_code == 200:
        sco_stations = sco_response.json()
        for station in sco_stations:
            if station.get('station_latitude') is not None and station.get('station_longitude') is not None:
                sco_station_data[station['station_no']] = {
                    'latitude': station.get('station_latitude'),
                    'longitude': station.get('station_longitude')
                }
    return sco_station_data

# Function to fetch latest hourly rainfall data for Scotland
def get_rainfall_data_sco(station_id):
    sco_url = f"https://www2.sepa.org.uk/rainfall/api/Hourly/{station_id}?all=true"
    sco_response = requests.get(sco_url)
    if sco_response.status_code == 200 and sco_response.json():
        return sco_response.json()[-1]
    return None

# Function to fetch station data with rainfall measurements for Wales
def get_wales_rainfall_data(api_key):
    url = 'https://api.naturalresources.wales/rivers-and-seas/v1/api/StationData'
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    response = requests.get(url, headers=headers)
    wales_rainfall_data = []
    if response.status_code == 200:
        wales_data = response.json()
        for station in wales_data:
            if station['coordinates']['latitude'] is not None and station['coordinates']['longitude'] is not None:
                station_id = station['location']
                latitude = station['coordinates']['latitude']
                longitude = station['coordinates']['longitude']
                
                # Find the 'Rainfall' parameter
                rainfall = None
                for parameter in station['parameters']:
                    if parameter['paramNameEN'] == 'Rainfall':
                        rainfall = parameter['latestValue']
                        break

                # Add the station data to the list if rainfall data is found
                if rainfall is not None:
                    wales_rainfall_data.append({
                        'station_id': station_id,
                        'rainfall': rainfall,
                        'latitude': latitude,
                        'longitude': longitude
                    })
    return wales_rainfall_data

# API key should be kept secure
api_key = '413a14f470f64b70a010cfa3b4ed6a79'

# Fetching and processing data for England and Scotland
eng_station_coordinates = fetch_station_data_eng()
eng_rainfall_data = get_rainfall_data_eng()
sco_station_coordinates = fetch_station_data_sco()
sco_rainfall_data = {station_id: get_rainfall_data_sco(station_id) for station_id in sco_station_coordinates}

# Fetching and processing data for Wales
wales_rainfall_data = get_wales_rainfall_data(api_key)

# Combine the data
combined_data = []

# Process and combine England data
for measurement in eng_rainfall_data:
    station_id = measurement.get('stationReference')
    rainfall = measurement.get('latestReading', {}).get('value')
    if station_id in eng_station_coordinates:
        coordinates = eng_station_coordinates[station_id]
        combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'England'])

# Process and combine Scotland data
for station_id, latest_hourly_data in sco_rainfall_data.items():
    if latest_hourly_data and station_id in sco_station_coordinates:
        rainfall = latest_hourly_data['Value']
        coordinates = sco_station_coordinates[station_id]
        combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'Scotland'])

# Process and combine Wales data
for station_data in wales_rainfall_data:
    combined_data.append([
        station_data['station_id'],
        station_data['rainfall'],
        station_data['latitude'],
        station_data['longitude'],
        'Wales'
    ])

# Write to CSV
filename = "final_combined_rainfall_measurements.csv"
with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Station ID", "Rainfall Measurement (mm)", "Latitude", "Longitude", "Country"])
    writer.writerows(combined_data)

print(f"Data has been written to {filename}")


Data has been written to final_combined_rainfall_measurements.csv


### same as before but getting county names through geopy

In [1]:
import requests
import csv
from geopy.geocoders import Nominatim
import time
from datetime import datetime

# Function to fetch county name using Geopy
def get_county_name(latitude, longitude):
    try:
        geolocator = Nominatim(user_agent="geoapiExercises")
        location = geolocator.reverse((latitude, longitude), exactly_one=True)
        address = location.raw['address']
        return address.get('county', 'County not found')
    except:
        return 'County not found'

# Function to fetch station data with coordinates for England
def fetch_station_data_eng():
    url = 'https://environment.data.gov.uk/flood-monitoring/id/stations?parameter=rainfall'
    response = requests.get(url)
    eng_station_data = {}
    if response.status_code == 200:
        data = response.json()
        for station in data['items']:
            if station.get('lat') is not None and station.get('long') is not None:
                eng_station_data[station.get('notation')] = {
                    'latitude': station.get('lat'),
                    'longitude': station.get('long')
                }
    return eng_station_data

# Function to fetch rainfall measurements for England
def get_rainfall_data_eng():
    eng_url = "http://environment.data.gov.uk/flood-monitoring/id/measures?parameter=rainfall"
    eng_response = requests.get(eng_url)
    if eng_response.status_code == 200:
        eng_data = eng_response.json()
        return eng_data['items']
    return []

# Function to fetch station data with coordinates for Scotland
def fetch_station_data_sco():
    sco_url = "https://www2.sepa.org.uk/rainfall/api/Stations"
    sco_response = requests.get(sco_url)
    sco_station_data = {}
    if sco_response.status_code == 200:
        sco_stations = sco_response.json()
        for station in sco_stations:
            if station.get('station_latitude') is not None and station.get('station_longitude') is not None:
                sco_station_data[station['station_no']] = {
                    'latitude': station.get('station_latitude'),
                    'longitude': station.get('station_longitude')
                }
    return sco_station_data

# Function to fetch latest hourly rainfall data for Scotland
def get_rainfall_data_sco(station_id):
    sco_url = f"https://www2.sepa.org.uk/rainfall/api/Hourly/{station_id}?all=true"
    sco_response = requests.get(sco_url)
    if sco_response.status_code == 200 and sco_response.json():
        return sco_response.json()[-1]
    return None

# Function to fetch station data with rainfall measurements for Wales
def get_wales_rainfall_data(api_key):
    url = 'https://api.naturalresources.wales/rivers-and-seas/v1/api/StationData'
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    response = requests.get(url, headers=headers)
    wales_rainfall_data = []
    if response.status_code == 200:
        wales_data = response.json()
        for station in wales_data:
            if station['coordinates']['latitude'] is not None and station['coordinates']['longitude'] is not None:
                station_id = station['location']
                latitude = station['coordinates']['latitude']
                longitude = station['coordinates']['longitude']
                
                # Find the 'Rainfall' parameter
                rainfall = None
                for parameter in station['parameters']:
                    if parameter['paramNameEN'] == 'Rainfall':
                        rainfall = parameter['latestValue']
                        break

                # Add the station data to the list if rainfall data is found
                if rainfall is not None:
                    wales_rainfall_data.append({
                        'station_id': station_id,
                        'rainfall': rainfall,
                        'latitude': latitude,
                        'longitude': longitude
                    })
    return wales_rainfall_data

# API key should be kept secure
api_key = '413a14f470f64b70a010cfa3b4ed6a79'

# Fetching and processing data for England, Scotland, and Wales
eng_station_coordinates = fetch_station_data_eng()
eng_rainfall_data = get_rainfall_data_eng()
sco_station_coordinates = fetch_station_data_sco()
sco_rainfall_data = {station_id: get_rainfall_data_sco(station_id) for station_id in sco_station_coordinates}
wales_rainfall_data = get_wales_rainfall_data(api_key)


# Combine the data
combined_data = []

# Process and combine England data
for measurement in eng_rainfall_data:
    station_id = measurement.get('stationReference')
    rainfall = measurement.get('latestReading', {}).get('value')
    coordinates = eng_station_coordinates.get(station_id, {'latitude': None, 'longitude': None})
    if coordinates['latitude'] is not None and coordinates['longitude'] is not None:
        combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'England'])

# Process and combine Scotland data
for station_id, latest_hourly_data in sco_rainfall_data.items():
    if latest_hourly_data:
        rainfall = latest_hourly_data['Value']
        coordinates = sco_station_coordinates.get(station_id, {'latitude': None, 'longitude': None})
        if coordinates['latitude'] is not None and coordinates['longitude'] is not None:
            combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'Scotland'])

# Process and combine Wales data
for station_data in wales_rainfall_data:
    station_id = station_data['station_id']
    rainfall = station_data['rainfall']
    latitude = station_data['latitude']
    longitude = station_data['longitude']
    if latitude is not None and longitude is not None:
        combined_data.append([station_id, rainfall, latitude, longitude, 'Wales'])


# Add geocoding for counties
for data_row in combined_data:
    latitude, longitude = data_row[2], data_row[3]
    if latitude and longitude:
        county = get_county_name(latitude, longitude)
        data_row.append(county)
    else:
        data_row.append('No coordinates')

    # To respect the usage limits of Nominatim, add a short sleep after each request
    time.sleep(.05)

# Write to CSV with the new 'County' column
filename = "final_combined_rainfall_measurements_with_county.csv"
with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    header = ["Station ID", "Rainfall Measurement (mm)", "Latitude", "Longitude", "Country", "County"]
    writer.writerow(header)
    writer.writerows(combined_data)

current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Data has been written to {filename} and current time is {current_time}")


Data has been written to final_combined_rainfall_measurements_with_county.csv and current time is 2024-03-15 00:35:40


### same as before but getting county names through opencage.

#### <span style="color:red"> Note: I am using free tier of opencage. so it can send only 2500 requests per day. currently we have about 1500 rows, which means we can only run this code once a day on free subscription. </span>

In [3]:
pip install opencage

Collecting opencage
  Obtaining dependency information for opencage from https://files.pythonhosted.org/packages/1e/c0/51441cbfe21146f5c153af3fa19f24f9621734af398f3da6f4e781adce68/opencage-2.4.0-py3-none-any.whl.metadata
  Downloading opencage-2.4.0-py3-none-any.whl.metadata (5.8 kB)
Collecting backoff>=2.2.1 (from opencage)
  Obtaining dependency information for backoff>=2.2.1 from https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl.metadata
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Downloading opencage-2.4.0-py3-none-any.whl (16 kB)
Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Installing collected packages: backoff, opencage
Successfully installed backoff-2.2.1 opencage-2.4.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To updat

In [5]:
import requests
import csv
from opencage.geocoder import OpenCageGeocode
import time
from datetime import datetime

# Function to fetch county name using OpenCage Geocoder
def get_county_name(latitude, longitude, opencage_api_key):
    try:
        geocoder = OpenCageGeocode(opencage_api_key)
        query = (latitude, longitude)
        results = geocoder.reverse_geocode(*query)

        if results and 'components' in results[0]:
            components = results[0]['components']
            if 'county' in components:
                return components['county']
            elif 'state_district' in components:
                return components['state_district']
            elif 'state' in components:
                return components['state']
            
        return 'County not found'
    except Exception as e:
        print(f"Error in geocoding: {e}")
        return 'County not found'


# Function to fetch station data with coordinates for England
def fetch_station_data_eng():
    url = 'https://environment.data.gov.uk/flood-monitoring/id/stations?parameter=rainfall'
    response = requests.get(url)
    eng_station_data = {}
    if response.status_code == 200:
        data = response.json()
        for station in data['items']:
            if station.get('lat') is not None and station.get('long') is not None:
                eng_station_data[station.get('notation')] = {
                    'latitude': station.get('lat'),
                    'longitude': station.get('long')
                }
    return eng_station_data

# Function to fetch rainfall measurements for England
def get_rainfall_data_eng():
    eng_url = "http://environment.data.gov.uk/flood-monitoring/id/measures?parameter=rainfall"
    eng_response = requests.get(eng_url)
    if eng_response.status_code == 200:
        eng_data = eng_response.json()
        return eng_data['items']
    return []

# Function to fetch station data with coordinates for Scotland
def fetch_station_data_sco():
    sco_url = "https://www2.sepa.org.uk/rainfall/api/Stations"
    sco_response = requests.get(sco_url)
    sco_station_data = {}
    if sco_response.status_code == 200:
        sco_stations = sco_response.json()
        for station in sco_stations:
            if station.get('station_latitude') is not None and station.get('station_longitude') is not None:
                sco_station_data[station['station_no']] = {
                    'latitude': station.get('station_latitude'),
                    'longitude': station.get('station_longitude')
                }
    return sco_station_data

# Function to fetch latest hourly rainfall data for Scotland
def get_rainfall_data_sco(station_id):
    sco_url = f"https://www2.sepa.org.uk/rainfall/api/Hourly/{station_id}?all=true"
    sco_response = requests.get(sco_url)
    if sco_response.status_code == 200 and sco_response.json():
        return sco_response.json()[-1]
    return None

# Function to fetch station data with rainfall measurements for Wales
def get_wales_rainfall_data(api_key):
    url = 'https://api.naturalresources.wales/rivers-and-seas/v1/api/StationData'
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    response = requests.get(url, headers=headers)
    wales_rainfall_data = []
    if response.status_code == 200:
        wales_data = response.json()
        for station in wales_data:
            if station['coordinates']['latitude'] is not None and station['coordinates']['longitude'] is not None:
                station_id = station['location']
                latitude = station['coordinates']['latitude']
                longitude = station['coordinates']['longitude']
                
                # Find the 'Rainfall' parameter
                rainfall = None
                for parameter in station['parameters']:
                    if parameter['paramNameEN'] == 'Rainfall':
                        rainfall = parameter['latestValue']
                        break

                # Add the station data to the list if rainfall data is found
                if rainfall is not None:
                    wales_rainfall_data.append({
                        'station_id': station_id,
                        'rainfall': rainfall,
                        'latitude': latitude,
                        'longitude': longitude
                    })
    return wales_rainfall_data

# API key for OpenCage Geocoder
opencage_api_key = '26ea8244e3124756b142ca8a605395bd'

# Fetching and processing data for England, Scotland, and Wales
eng_station_coordinates = fetch_station_data_eng()
eng_rainfall_data = get_rainfall_data_eng()
sco_station_coordinates = fetch_station_data_sco()
sco_rainfall_data = {station_id: get_rainfall_data_sco(station_id) for station_id in sco_station_coordinates}
wales_rainfall_data = get_wales_rainfall_data('413a14f470f64b70a010cfa3b4ed6a79')  # Replace with the actual API key for Natural Resources Wales

# Combine the data
combined_data = []

# Process and combine England data
for measurement in eng_rainfall_data:
    station_id = measurement.get('stationReference')
    rainfall = measurement.get('latestReading', {}).get('value')
    coordinates = eng_station_coordinates.get(station_id, {'latitude': None, 'longitude': None})
    if coordinates['latitude'] is not None and coordinates['longitude'] is not None:
        combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'England'])

# Process and combine Scotland data
for station_id, latest_hourly_data in sco_rainfall_data.items():
    if latest_hourly_data:
        rainfall = latest_hourly_data['Value']
        coordinates = sco_station_coordinates.get(station_id, {'latitude': None, 'longitude': None})
        if coordinates['latitude'] is not None and coordinates['longitude'] is not None:
            combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'Scotland'])

# Process and combine Wales data
for station_data in wales_rainfall_data:
    station_id = station_data['station_id']
    rainfall = station_data['rainfall']
    latitude = station_data['latitude']
    longitude = station_data['longitude']
    if latitude is not None and longitude is not None:
        combined_data.append([station_id, rainfall, latitude, longitude, 'Wales'])

# Add geocoding for counties using OpenCage
for data_row in combined_data:
    latitude, longitude = data_row[2], data_row[3]
    if latitude and longitude:
        county = get_county_name(latitude, longitude, opencage_api_key)
        data_row.append(county)
    else:
        data_row.append('No coordinates')

    # To respect the usage limits of OpenCage, add a short sleep after each request
    time.sleep(1)

# Write to CSV with the new 'County' column
filename = "a_final_combined_rainfall_measurements_with_county.csv"
with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    header = ["Station ID", "Rainfall Measurement (mm)", "Latitude", "Longitude", "Country", "County"]
    writer.writerow(header)
    writer.writerows(combined_data)

current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Data has been written to {filename} and current time is {current_time}")


Data has been written to a_final_combined_rainfall_measurements_with_county.csv and current time is 2024-03-15 01:46:34


##### if the above code doesnt work

In [None]:
Thank you for providing the example from the OpenCage Geocoder API documentation. Based on this, it's clear how the API response is structured, which will help in correctly parsing the data in the `get_county_name` function. The API returns JSON data where the county information can be found inside the `components` dictionary of each result.


def get_county_name(latitude, longitude, opencage_api_key):
    try:
        geocoder = OpenCageGeocode(opencage_api_key)
        query = f"{latitude},{longitude}"
        results = geocoder.geocode(query)

        if results and 'components' in results[0]:
            components = results[0]['components']
            # Depending on the location, 'county' might be under different keys
            county = components.get('county') or components.get('state_district') or components.get('state')
            if county:
                return county

        return 'County not found'
    except Exception as e:
        print(f"Error in geocoding: {e}")
        return 'County not found'


## Final version of the code

### same as previous but adds average rainfall for counties
#### <span style="color:red"> Note: It takes about 40 minutes for this code to execute</span>

In [1]:
import requests
import csv
from opencage.geocoder import OpenCageGeocode
import time
from datetime import datetime

# Function to fetch county name using OpenCage Geocoder
def get_county_name(latitude, longitude, opencage_api_key):
    try:
        geocoder = OpenCageGeocode(opencage_api_key)
        query = (latitude, longitude)
        results = geocoder.reverse_geocode(*query)

        if results and 'components' in results[0]:
            components = results[0]['components']
            if 'county' in components:
                return components['county']
            elif 'state_district' in components:
                return components['state_district']
            elif 'state' in components:
                return components['state']
            
        return 'County not found'
    except Exception as e:
        print(f"Error in geocoding: {e}")
        return 'County not found'

# Function to fetch station data with coordinates for England
def fetch_station_data_eng():
    url = 'https://environment.data.gov.uk/flood-monitoring/id/stations?parameter=rainfall'
    response = requests.get(url)
    eng_station_data = {}
    if response.status_code == 200:
        data = response.json()
        for station in data['items']:
            if station.get('lat') is not None and station.get('long') is not None:
                eng_station_data[station.get('notation')] = {
                    'latitude': station.get('lat'),
                    'longitude': station.get('long')
                }
    return eng_station_data

# Function to fetch rainfall measurements for England
def get_rainfall_data_eng():
    eng_url = "http://environment.data.gov.uk/flood-monitoring/id/measures?parameter=rainfall"
    eng_response = requests.get(eng_url)
    if eng_response.status_code == 200:
        eng_data = eng_response.json()
        return eng_data['items']
    return []

# Function to fetch station data with coordinates for Scotland
def fetch_station_data_sco():
    sco_url = "https://www2.sepa.org.uk/rainfall/api/Stations"
    sco_response = requests.get(sco_url)
    sco_station_data = {}
    if sco_response.status_code == 200:
        sco_stations = sco_response.json()
        for station in sco_stations:
            if station.get('station_latitude') is not None and station.get('station_longitude') is not None:
                sco_station_data[station['station_no']] = {
                    'latitude': station.get('station_latitude'),
                    'longitude': station.get('station_longitude')
                }
    return sco_station_data

# Function to fetch latest hourly rainfall data for Scotland
def get_rainfall_data_sco(station_id):
    sco_url = f"https://www2.sepa.org.uk/rainfall/api/Hourly/{station_id}?all=true"
    sco_response = requests.get(sco_url)
    if sco_response.status_code == 200 and sco_response.json():
        return sco_response.json()[-1]
    return None

# Function to fetch station data with rainfall measurements for Wales
def get_wales_rainfall_data(api_key):
    url = 'https://api.naturalresources.wales/rivers-and-seas/v1/api/StationData'
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    response = requests.get(url, headers=headers)
    wales_rainfall_data = []
    if response.status_code == 200:
        wales_data = response.json()
        for station in wales_data:
            if station['coordinates']['latitude'] is not None and station['coordinates']['longitude'] is not None:
                station_id = station['location']
                latitude = station['coordinates']['latitude']
                longitude = station['coordinates']['longitude']
                
                # Find the 'Rainfall' parameter
                rainfall = None
                for parameter in station['parameters']:
                    if parameter['paramNameEN'] == 'Rainfall':
                        rainfall = parameter['latestValue']
                        break

                # Add the station data to the list if rainfall data is found
                if rainfall is not None:
                    wales_rainfall_data.append({
                        'station_id': station_id,
                        'rainfall': rainfall,
                        'latitude': latitude,
                        'longitude': longitude
                    })
    return wales_rainfall_data

# API key for OpenCage Geocoder
opencage_api_key = '26ea8244e3124756b142ca8a605395bd'

# Fetching and processing data for England, Scotland, and Wales
eng_station_coordinates = fetch_station_data_eng()
eng_rainfall_data = get_rainfall_data_eng()
sco_station_coordinates = fetch_station_data_sco()
sco_rainfall_data = {station_id: get_rainfall_data_sco(station_id) for station_id in sco_station_coordinates}
wales_rainfall_data = get_wales_rainfall_data('413a14f470f64b70a010cfa3b4ed6a79')  # Replace with the actual API key for Natural Resources Wales

# Combine the data
combined_data = []

# Process and combine England data
for measurement in eng_rainfall_data:
    station_id = measurement.get('stationReference')
    rainfall = measurement.get('latestReading', {}).get('value')
    coordinates = eng_station_coordinates.get(station_id, {'latitude': None, 'longitude': None})
    if coordinates['latitude'] is not None and coordinates['longitude'] is not None:
        combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'England'])

# Process and combine Scotland data
for station_id, latest_hourly_data in sco_rainfall_data.items():
    if latest_hourly_data:
        rainfall = latest_hourly_data['Value']
        coordinates = sco_station_coordinates.get(station_id, {'latitude': None, 'longitude': None})
        if coordinates['latitude'] is not None and coordinates['longitude'] is not None:
            combined_data.append([station_id, rainfall, coordinates['latitude'], coordinates['longitude'], 'Scotland'])

# Process and combine Wales data
for station_data in wales_rainfall_data:
    station_id = station_data['station_id']
    rainfall = station_data['rainfall']
    latitude = station_data['latitude']
    longitude = station_data['longitude']
    if latitude is not None and longitude is not None:
        combined_data.append([station_id, rainfall, latitude, longitude, 'Wales'])

# Add geocoding for counties using OpenCage
for data_row in combined_data:
    latitude, longitude = data_row[2], data_row[3]
    if latitude and longitude:
        county = get_county_name(latitude, longitude, opencage_api_key)
        data_row.append(county)
    else:
        data_row.append('No coordinates')

    # To respect the usage limits of OpenCage, add a short sleep after each request
    time.sleep(1)

# Calculate average rainfall for each county
county_rainfall = {}
for data_row in combined_data:
    county = data_row[5]
    rainfall = float(data_row[1]) if data_row[1] else 0
    if county not in county_rainfall:
        county_rainfall[county] = {'total_rainfall': 0, 'count': 0}
    county_rainfall[county]['total_rainfall'] += rainfall
    county_rainfall[county]['count'] += 1

for county, data in county_rainfall.items():
    average_rainfall = round(data['total_rainfall'] / data['count'], 2) if data['count'] > 0 else 0
    county_rainfall[county]['average_rainfall'] = average_rainfall


# Append the average rainfall to the data rows
for data_row in combined_data:
    county = data_row[5]
    average_rainfall = county_rainfall[county].get('average_rainfall', 0)
    data_row.append(average_rainfall)

# Write to CSV with the new 'County Avg Rainfall' column
filename = "rainfall_measurements_with_county.csv"
with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    header = ["Station ID", "Rainfall Measurement (mm)", "Latitude", "Longitude", "Country", "County", "County Avg Rainfall (mm)"]
    writer.writerow(header)
    writer.writerows(combined_data)

current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Data has been written to {filename} and current time is {current_time}")


Data has been written to test_final_combined_rainfall_measurements_with_county_and_avg.csv and current time is 2024-03-17 00:49:46
