# Introduction
This notebook is for collecting and processing rainfall data required for our data analysis notebook.

# 1. Import Libraries ⚙️


This code imports essential libraries required for data collection and analysis:

In [1]:
import os
import json

import requests

import pandas as pd

from datetime import datetime

# 2. Helpful Functions 🛠️

The code below reads the world cities data and return the latitude and longitude of a city. You can use it to get the coordinates of the city you want to analyse.

In [2]:
def get_lat_lon(country_code, city_name):
    
    # Define the path to the CSV file with city data
    filepath = '../data/world_cities.csv'
    
    # Load the CSV file into a DataFrame
    world_cities = pd.read_csv(filepath)

    # Filter the DataFrame to find the specific city based on country code and city name
    city_data = world_cities[(world_cities['country'] == country_code) & 
                             (world_cities['name'] == city_name)]
    
    # Convert the filtered data to a list of dictionaries for easy access
    city_data = city_data.to_dict('records')
    
    # Raise an error if no matching record is found
    if len(city_data) == 0:
        raise ValueError(f"No records found for {city_name}, {country_code} in {filepath}")

    # Extract latitude and longitude from the first matching record
    latitude = city_data[0]['lat']
    longitude = city_data[0]['lng']

    # Return latitude and longitude as a tuple
    return latitude, longitude


Let's test this function for Singapore, SG.

In [3]:
get_lat_lon("SG", "Singapore")

(1.28967, 103.85007)

**I wrote a function to construct the URL for me. This way I can call it anytime inside or outside another function:**

In [4]:
def build_url(latitude: float, longitude: float, start_date: str, end_date: str):
    # Base URL for the historical weather API
    base_historical_url = "https://archive-api.open-meteo.com/v1/era5?"
    
    # Add latitude and longitude parameters to the URL
    params_lat_long = "latitude=" + str(latitude) + "&longitude=" + str(longitude)
    
    # Add start and end date parameters for the data range
    params_date = "&start_date=" + start_date + "&end_date=" + end_date

    # Specify data type (precipitation) and set timezone to auto for local adjustment
    params_others = "&daily=precipitation_sum,precipitation_hours&timezone=auto"

    # Combine all parts to form the final API URL
    final_url = base_historical_url + params_lat_long + params_date + params_others

    return final_url


Let's test the function for Singapore, SG.

In [5]:
build_url(1.28967, 103.8501, "2023-01-01", "2023-01-02")

'https://archive-api.open-meteo.com/v1/era5?latitude=1.28967&longitude=103.8501&start_date=2023-01-01&end_date=2023-01-02&daily=precipitation_sum,precipitation_hours&timezone=auto'

**Compile a list of city data, including country code and city name**

In [6]:
cities = [
    ("GB", "London"),       
    ("SG", "Singapore"),    
    ("EG", "Cairo"),        
    ("AR", "Buenos Aires"), 
    ("IN", "Mumbai")        
]


**Compile latitudes and longitudes for all cities required for our analysis using a for loop function**

In [7]:
geo_data = []

# Loop through each city and country code in the cities list
for country_code, city_name in cities:
    # Get latitude and longitude for the specified city and country
    latitude, longitude = get_lat_lon(country_code, city_name)
    
    # Append the country code, city name, latitude, and longitude as a tuple to geo_data
    geo_data.append((country_code, city_name, latitude, longitude))

# Display the collected geographic data
geo_data


[('GB', 'London', 51.50853, -0.12574),
 ('SG', 'Singapore', 1.28967, 103.85007),
 ('EG', 'Cairo', 30.06263, 31.24967),
 ('AR', 'Buenos Aires', -34.61315, -58.37723),
 ('IN', 'Mumbai', 19.07283, 72.88261)]

## 2. Historical Rainfall 🌧️

I want to create a function that gives me the historical rainfall for each country using the functions we have defined earlier which are:

1. get_lat_long --> To obtain the latitude and longitude of the country
2. build_url --> Obtain the URL to obtain the variables required for each country

In [16]:
def get_historical_data(country_code, city_name, start_date="2023-01-01", end_date="2023-12-31"):
    """
    Retrieves historical weather data for a specific city using default dates if none are provided.
    
    Parameters:
        country_code (str): The country code of the city.
        city_name (str): The name of the city.
        start_date (str): Start date in "YYYY-MM-DD" format. Defaults to Jan 1, 2023.
        end_date (str): End date in "YYYY-MM-DD" format. Defaults to Dec 31, 2023.
    
    Returns:
        dict: Dictionary of historical rainfall data.
    """
    
    # Get latitude and longitude for the specified city and country
    latitude, longitude = get_lat_lon(country_code, city_name)
    
    # Build the API URL using the latitude, longitude, and date range
    url = build_url(latitude, longitude, start_date, end_date)
    
    # Make an API request to retrieve historical weather data
    response = requests.get(url)
    
    # Parse the JSON response and return daily data
    data = response.json()
    
    return data.get("daily", {})


Let's test this function for Singapore, SG for 2 specific dates.

In [17]:
get_historical_data("SG", "Singapore", "2023-01-01", "2023-01-02")

{'time': ['2023-01-01', '2023-01-02'],
 'precipitation_sum': [0.0, 3.7],
 'precipitation_hours': [0.0, 5.0]}

I've created a dictionary to collect and store historical rainfall data for each city.

In [10]:
# Create an empty dictionary to store historical rainfall data for each city
historical_rainfall = {}

# Loop through each city and country code in geo_data
for country_code, city_name, _, _ in geo_data:
    # Retrieve historical rainfall data for the city (using default dates)
    rainfall = get_historical_data(country_code, city_name)
    
    # Store the rainfall data in a dictionary with the city name as the key
    historical_rainfall[city_name] = rainfall


In [11]:
print(geo_data)

[('GB', 'London', 51.50853, -0.12574), ('SG', 'Singapore', 1.28967, 103.85007), ('EG', 'Cairo', 30.06263, 31.24967), ('AR', 'Buenos Aires', -34.61315, -58.37723), ('IN', 'Mumbai', 19.07283, 72.88261)]


A few checks to confirm it worked:

In [18]:
historical_rainfall.keys() # Check for the keys in the historical_rainfall dictionary

dict_keys(['London', 'Singapore', 'Cairo', 'Buenos Aires', 'Mumbai'])

In [20]:
# Loop through each city and its rainfall data in the historical_rainfall dictionary
for city, rainfall in historical_rainfall.items():
    # Print the city name and the number of elements in the rainfall data list
    print(f"The value for key {city:10s} is a list of {len(rainfall)} elements")


The value for key London     is a list of 3 elements
The value for key Singapore  is a list of 3 elements
The value for key Cairo      is a list of 3 elements
The value for key Buenos Aires is a list of 3 elements
The value for key Mumbai     is a list of 3 elements


**This code saves the `historical_rainfall` data to a JSON file:**

In [14]:
with open('../data/multicity_historical.json', 'w') as file:
    json.dump(historical_rainfall, file)