# City Raininess

##### This notebook allows a user to enter a city and country, and will locate the latitude and longitude of the most populated city following those parameters,if there is one; this utilizes the world_cities CSV. Then, based on the latitude and longitude of the city, historical and forecast data will be determined from the OpenMeteo API. A raininess index is defined, weighting recent raininess more heavily than less recent data.

***

In [25]:
# Importing
import pandas as pd
import requests
import datetime
import pprint
import math
import lets_plot
import time

***

### 1. Converting City and Country to Latitude and Longitude

In [26]:
def get_city_latlon(city, country):
    '''
    Get the latitude and longitude of a city in a country, derived from the world_cities CSV file.
    Params:
        city: str, the name of the city
        country: str, the name of the country
    Returns:
        a tuple of the latitude and longitude
    '''
    # Read the world_cities CSV file
    world_cities = pd.read_csv('world_cities.csv')

    # Filter the dataframe based on the city and country
    filtered_cities = world_cities[(world_cities['name'] == city) & (world_cities['country'] == country)]

    # Check if any matching cities are found
    if len(filtered_cities) > 0:
        # Get the longitude of the first matching city
        longitude = filtered_cities.iloc[0]['lng']
        # Get the latitude of the first matching city
        latitude = filtered_cities.iloc[0]['lat']
        # Return the latitude and longitude as a tuple
        return (latitude, longitude)
    else:
        # Return None if no matching city is found
        return None

### <center>Testing</center>

In [27]:
# Testing the above function

# Test case 1: Mumbai, India
city = 'Mumbai'
country = 'IN'
print(f'The latitude and longitude of {city}, {country} is {get_city_latlon(city, country)}')

# Test case 2: New York City, United States
city = 'New York City'
country = 'US'
print(f'The latitude and longitude of {city}, {country} is {get_city_latlon(city, country)}')

# Test case 3: Tokyo, Japan
city = 'Tokyo'
country = 'JP'
print(f'The latitude and longitude of {city}, {country} is {get_city_latlon(city, country)}')

# Test case 4: Walnut Creek, United States
city = 'Walnut Creek'
country = 'US'
print(f'The latitude and longitude of {city}, {country} is {get_city_latlon(city, country)}')

The latitude and longitude of Mumbai, IN is (19.07283, 72.88261)
The latitude and longitude of New York City, US is (40.71427, -74.00597)
The latitude and longitude of Tokyo, JP is (35.6895, 139.69171)
The latitude and longitude of Walnut Creek, US is (37.90631, -122.06496)


***

### 2. Collecting Historical Data (Last Year)

In [28]:
def get_historical_precipitation(latitude: float, longitude: float) -> dict:
    '''
    Get the number of days rained and number of mm of rain in the past 2 years
    Params:
        latitude: float - latitude of the location
        longitude: float - longitude of the location
    Returns:
        a dictionary of the daily rain for the past 2 years in mm, number of hours of precipitation
    '''
    
    # Get the historical weather data

    '''base_historical_url = "https://archive-api.open-meteo.com/v1/era5?"
    params_lat_long_ = "latitude=" + str(latitude) + "&longitude=" + str(longitude)

    # Get the current date
    current_date = datetime.date.today()
    
    # Calculate the date five years ago
    two_years_ago = current_date - datetime.timedelta(days=365*3)
    
    # Format the date as YYYY-MM-DD
    formatted_date = two_years_ago.strftime("%Y-%m-%d")
    
    params_dates = "&start_date=" + formatted_date + "&end_date=" + str(current_date)
    param_other = "&daily=rain_sum&daily=precipitation_hours"
    total_url = base_historical_url + params_lat_long_ + params_dates + param_other

    response = requests.get(total_url)
    historical_data = response.json()
    
    historical_rain_sum = historical_data['daily']['rain_sum']
    historical_precipitation_hours = historical_data['daily']['precipitation_hours']
    
    return {
        "Historical Rain Sum": historical_rain_sum, 
        "Historical Precipitation Hours": historical_precipitation_hours
            }'''
    
    base_historical_url = "https://archive-api.open-meteo.com/v1/era5?"
    params_lat_long = f"latitude={latitude}&longitude={longitude}"

    current_date = datetime.date.today()
    five_years_ago = current_date - datetime.timedelta(days=365 * 5)
    start_date = five_years_ago.strftime("%Y-%m-%d")
    end_date = current_date.strftime("%Y-%m-%d")

    params_dates = f"&start_date={start_date}&end_date={end_date}"
    param_other = "&daily=rain_sum&daily=precipitation_hours"
    total_url = base_historical_url + params_lat_long + params_dates + param_other

    try:
        response = requests.get(total_url)
        historical_data = response.json()
        
        # Debugging: Print the entire response if 'daily' is missing
        if 'daily' not in historical_data:
            print(f"Full response for coordinates ({latitude}, {longitude}): {historical_data}")
            return {"Historical Rain Sum": [], "Historical Precipitation Hours": []}
        
        return {
            "Historical Rain Sum": historical_data['daily'].get('rain_sum', []),
            "Historical Precipitation Hours": historical_data['daily'].get('precipitation_hours', [])
        }

    except requests.exceptions.RequestException as e:
        print(f"Request failed for ({latitude}, {longitude}) with error: {e}")
        return {"Historical Rain Sum": [], "Historical Precipitation Hours": []}
    


### <center>Testing</center>

In [29]:
# Testing historical precipitation using the get_historical_precipitation function and the get_city_latlon function

# Test 1: San Francisco
print("Test 1: San Francisco")
city = 'San Francisco'
country = 'US'
latitude, longitude = get_city_latlon(city, country)
pprint.pp(get_historical_precipitation(latitude, longitude))

print("\n")
print("-" * 20)
print("\n")

# Test 2: New York
print("Test 2: New York City")
city = 'New York City'
country = 'US'
latitude, longitude = get_city_latlon(city, country)
pprint.pp(get_historical_precipitation(latitude, longitude))

print("\n")
print("-" * 20)
print("\n")

# Test 3: London
print("Test 3: London")
city = 'London'
country = 'GB'
latitude, longitude = get_city_latlon(city, country)
pprint.pp(get_historical_precipitation(latitude, longitude))

Test 1: San Francisco
{'Historical Rain Sum': [0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.5,
                         0.0,
                         0.1,
                         0.0,
                         0.0,
                         0.0,
                         1.5,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         1.3,
                         21.6,
                         1.7,
                         0.4,
                         5.4,
                         27.0,
                         40.8,
                         2.6,
                         19.9,
                         1.7,
                         1.2,
              

***

### 3. Collecting Forecast Data (Next 7 Days)

In [30]:
def get_forecast_precipitation(latitude: float, longitude: float) -> dict:
    '''
    Get the hourly forecast precipitation for a given latitude and longitude for the next 7 days
    Params: 
        latitude: float - latitude of the location
        longitude: float - Longitude of the location
    Returns:
        dict - Forecast precipitation
    '''

    # Building the base URL
    base_forecast_url = "https://api.open-meteo.com/v1/forecast?"
    params_lat_long = "latitude=" + str(latitude) + "&longitude="  + str(longitude)
    params_others = "&daily=rain_sum"

    final_url = base_forecast_url + params_lat_long + params_others

    # Getting the forecast data
    response = requests.get(final_url)

    # Extracting the forecast precipitation
    forecast_data = response.json()
    forecast_precipitation = forecast_data['daily']['rain_sum']
    return forecast_precipitation

### <center>Testing</center>

In [31]:
# Testing the get_forecast_precipitation function using the get_city_latlon function

# Test 1: San Francisco
city = 'San Francisco'
country = 'US'
latitude, longitude = get_city_latlon(city, country)
print(get_forecast_precipitation(latitude, longitude))

# Test 2: New York
city = 'New York City'
country = 'US'
latitude, longitude = get_city_latlon(city, country)
print(get_forecast_precipitation(latitude, longitude))

# Test 3: London
city = 'London'
country = 'GB'
latitude, longitude = get_city_latlon(city, country)
print(get_forecast_precipitation(latitude, longitude))

# Test 4: Mumbai
city = 'Mumbai'
country = 'IN'
latitude, longitude = get_city_latlon(city, country)
print(get_forecast_precipitation(latitude, longitude))

# Test 5: Tokyo
city = 'Tokyo'
country = 'JP'
latitude, longitude = get_city_latlon(city, country)
print(get_forecast_precipitation(latitude, longitude))

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.6, 0.0, 0.0, 0.0, 1.5]


***

### 4. Defining the Raininess Index

##### The raininess index combines the daily historical data of amount of rain (mm), hours of precipitation, and the mean probability of precipitation with the hourly forecast data of amount of precipitation

#### Collecting London data:

##### To collect data for a particular city (in this case, London), we can call the functions defined about to gather both historical and forecast data

In [32]:
# Collecting historical and forecast precipitation data for London, GB
city = 'London'
country = 'GB'
latitude, longitude = get_city_latlon(city, country)
historical_precipitation = f"   Historical precipitation: {get_historical_precipitation(latitude, longitude)}"
forecast_precipitation = f"   Forecast precipitation: {get_forecast_precipitation(latitude, longitude)}"
print(f"Weather data for {city}, {country}:")
print(historical_precipitation)
print(forecast_precipitation)

# storing above output into a variable
London = {
    "City": city,
    "Country": country,
    "Historical Precipitation": get_historical_precipitation(latitude, longitude),
    "Forecast Precipitation": get_forecast_precipitation(latitude, longitude)
}

Weather data for London, GB:
   Historical precipitation: {'Historical Rain Sum': [0.7, 0.3, 12.8, 1.7, 5.0, 1.4, 4.6, 2.3, 4.0, 15.4, 7.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.9, 1.7, 2.3, 0.0, 1.6, 4.7, 12.3, 2.4, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 5.5, 0.0, 3.6, 1.3, 4.8, 0.1, 7.5, 8.7, 6.6, 3.1, 2.5, 7.9, 6.6, 10.8, 14.5, 5.2, 4.3, 0.0, 1.7, 0.0, 7.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.1, 0.0, 0.0, 1.3, 0.1, 3.5, 6.5, 0.1, 0.0, 1.0, 4.8, 5.4, 12.5, 6.2, 2.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 2.9, 6.9, 1.5, 0.0, 0.6, 0.1, 0.0, 6.7, 0.3, 0.1, 0.0, 0.0, 0.0, 1.0, 18.8, 2.4, 0.3, 0.4, 12.8, 0.3, 6.8, 15.7, 1.0, 2.2, 1.6, 3.9, 0.0, 0.0, 1.2, 4.3, 2.7, 0.8, 7.2, 6.1, 5.5, 0.0, 4.2, 0.2, 5.1, 15.4, 0.0, 0.0, 3.3, 4.0, 3.1, 0.2, 1.6, 0.1, 3.5, 1.7, 0.0, 0.0, 0.6, 1.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.6, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 6.4, 2.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 13.6, 3.4, 7.9, 2.2, 0.0, 1.0, 0

#### Converting London Data to Raininess Index

##### Now, a raininess index will be created, which weights more recent weather more heavily to determine how "rainy" a city is

##### The weighting works as follows:

The most emphasis is placed on the past two weeks of rain, with the least recent historical data being weighted the least. 

As forecast data is often not very accurate (e.g., many very rainy cities often show no rain in the forecast), forecast data will not **decrease** the raininess index of a city. However, if there is a substantial amount of rain in the forecast, this will **increase** the raininess index.

In [33]:
# The raininess index
def raininess(data: dict) -> str:
    '''
    Calculate the raininess of a city based on the historical and forecast precipitation data
    Params:
        data: dict - a dictionary containing historical and forecast precipitation data
    Returns:
        int - the raininess of the city
    '''

    historical_rain_sum = data['Historical Precipitation']['Historical Rain Sum']
    historical_precipitation_hours = data['Historical Precipitation']['Historical Precipitation Hours']
    forecast_rain_sum = data['Forecast Precipitation']

    # Weights for each year of the historical data
    weights = [1, 0.5]

    # Split data into yearly chunks
    days_per_year = len(historical_rain_sum) // 2  # Assuming equal length data for each year
    weighted_rain_sum = 0
    weighted_precipitation_hours = 0

    # Calculate the weighted sum of the historical data
    for i, weight in enumerate(weights):
        start_idx = i * days_per_year
        end_idx = start_idx + days_per_year
        
        # Calculate the sum for this year's data
        year_rain_sum = sum(day for day in historical_rain_sum[start_idx:end_idx] if day is not None)
        year_precipitation_hours = sum(hour for hour in historical_precipitation_hours[start_idx:end_idx] if hour is not None)
        
        # Apply weights
        weighted_rain_sum += weight * year_rain_sum
        weighted_precipitation_hours += weight * year_precipitation_hours

    # Calculate the raininess index based on weighted values
    raininess_index = weighted_rain_sum + 0.3 * weighted_precipitation_hours

    # Add a small boost if there's significant rain in the forecast
    forecast_threshold = 5
    if any(rain >= forecast_threshold for rain in forecast_rain_sum):
        raininess_index += 10  # Small boost if there's rain in the forecast

    # Normalize the raininess index using a logarithmic scale
    log_base = 4
    multiplier = 10
    normalized_raininess_index = multiplier * math.log(raininess_index + 1, log_base)

    return normalized_raininess_index


### <center>Testing</center>

In [34]:
# Testing the raininess function

# Test 1: London
print("Raininess of London:")
print(raininess(London))

print("-" * 20)

# Test 2: San Francisco
print("Raininess of San Francisco:")

city = 'San Francisco'
country = 'US'
latitude, longitude = get_city_latlon(city, country)

San_Francisco = {
    "City": 'San Francisco',
    "Country": 'US',
    "Historical Precipitation": get_historical_precipitation(latitude, longitude),
    "Forecast Precipitation": get_forecast_precipitation(latitude, longitude)
}

print(raininess(San_Francisco))

print("-" * 20)

# Test 3: New York

print("Raininess of New York City:")
city = 'New York City'
country = 'US'
latitude, longitude = get_city_latlon(city, country)

New_York = {
    "City": 'New York City',
    "Country": 'US',
    "Historical Precipitation": get_historical_precipitation(latitude, longitude),
    "Forecast Precipitation": get_forecast_precipitation(latitude, longitude)
}

print(raininess(New_York))

print("-" * 20)

# Test 4: Mumbai
print("Raininess of Mumbai:")
city = 'Mumbai'
country = 'IN'
latitude, longitude = get_city_latlon(city, country)

Mumbai = {
    "City": 'Mumbai',
    "Country": 'IN',
    "Historical Precipitation": get_historical_precipitation(latitude, longitude),
    "Forecast Precipitation": get_forecast_precipitation(latitude, longitude)
}

print(raininess(Mumbai))

Raininess of London:
60.89775740596843
--------------------
Raininess of San Francisco:
57.0570252968864
--------------------
Raininess of New York City:
63.218548293092375
--------------------
Raininess of Mumbai:
67.41744156403331


#### For now, the raininess index is just a number. 

We are next going to run a number of cities through these functions in order to create a distribution of raininess; from this distribution, the bottom 25% of cities (in terms of raininess index) will be deemed "Not rainy", while the top 25% will be deemed "Very rainy". Our goal here is to see where London falls in this distribution.

***

### 4. Comparing London to Other Cities

To get a wide range of latitudes and logitudes, I will first compare London to other world capitals, contained in the file [country-capital-lat-long-population.csv](https://gist.github.com/ofou/df09a6834a8421b4f376c875194915c9)

Because London is a capital, the London data will be derived from the CSV while the capitals are iterated through.

In [35]:
# Creating a DF of world cities
cities_df = pd.read_csv('country-capital-lat-long-population.csv')

# Initialize an empty list to store raininess index values
raininess_values = []

# Loop through each row in the DataFrame
for index, row in cities_df.iterrows():
    try:
        # Calculate the raininess index for the current city
        raininess_index = raininess({
            "City": row['Capital City'],
            "Country": row['Country'],
            "Historical Precipitation": get_historical_precipitation(row['Latitude'], row['Longitude']),
            "Forecast Precipitation": get_forecast_precipitation(row['Latitude'], row['Longitude'])
        })

        # Add a delay between each request to avoid hitting rate limits
        time.sleep(5)
        
    except KeyError as e:
        # Log an error message and skip to the next iteration
        print(f"KeyError for {row['Capital City']}, {row['Country']}: {e}")
        raininess_index = None  # Optional: assign None or a default value if data is missing
    
    # Append the calculated raininess index (or None) to the list
    raininess_values.append(raininess_index)

cities_df['Raininess'] = raininess_values

# Sort the cities by raininess index
cities_df = cities_df.sort_values(by='Raininess', ascending=False)

# Reset the index and remove the old one
cities_df = cities_df.reset_index(drop=True)

# Set the index to start at 1 instead of 0
cities_df.index = cities_df.index + 1

# Select only the relevant columns to display
sample_cities_df = cities_df[['Capital City', 'Country', 'Latitude', 'Longitude', 'Raininess']]

# Display the top 10 rainiest cities
print("Top 10 Rainiest Capital Cities:")
print(sample_cities_df.head(10))

Top 10 Rainiest Capital Cities:
           Capital City                      Country  Latitude  Longitude  \
1               Cayenne                French Guiana    4.9333   -52.3333   
2                 Koror                        Palau    7.3426   134.4789   
3               Palikir  Micronesia (Fed. States of)    6.9174   158.1588   
4                Majuro             Marshall Islands    7.0897   171.3803   
5               Papeete             French Polynesia  -17.5333  -149.5667   
6   Bandar Seri Begawan            Brunei Darussalam    4.9403   114.9481   
7              Monrovia                      Liberia    6.3005   -10.7969   
8              São Tomé        Sao Tome and Principe    0.3365     6.7273   
9                  Apia                        Samoa  -13.8333  -171.7667   
10             Funafuti                       Tuvalu   -8.5189   179.1991   

    Raininess  
1   72.568900  
2   72.185233  
3   71.760855  
4   71.673915  
5   70.664145  
6   70.391859  
7   70.3