In [30]:
import os

import numpy as np
import pandas
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact

from data.population.utils import filter_by_type, get_indices_matching_type, CityType

In [31]:
# Assumes csv has columns:
# - Country, City, Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec, Year
# And that the rows are alternatingly celsius and fahrentheit (starting with the former).

def get_temperature_spread(file_path, save=False):
    temperatures = pandas.read_csv(file_path).loc[::2, "City":"Year"] 
    city_labels = temperatures.loc[:, "City"].to_numpy()
    avg_temperatures = temperatures.loc[:, "Year"].to_numpy()    
    max_temperatures = np.max(temperatures.loc[:, "Jan": "Dec"].to_numpy(), axis=1)
    min_temperatures = np.min(temperatures.loc[:, "Jan": "Dec"].to_numpy(), axis=1)
    
    if save:
        cleaned_data = pandas.DataFrame(np.array([avg_temperatures, max_temperatures, min_temperatures]).T, columns=["Avg", "Max", "Min"], index=city_labels)
        cleaned_data.to_csv(f"./cleaned/{os.path.basename(file_path)}")
    
    return city_labels, avg_temperatures, max_temperatures, min_temperatures

def plot_labeled_scatterplot(labels, x, y, title="", xlabel="", ylabel="", fig_kwargs={}):
    fig_kwargs["figsize"] = fig_kwargs.get("figsize", (15,15))
    plt.figure(**fig_kwargs)
    plt.scatter(x, y)

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)

    for i, label in enumerate(labels):
        plt.annotate(label, (x[i], y[i]))
        
    plt.show()
    

In [32]:
na_cities, na_avgs, na_maxs, na_mins = get_temperature_spread("./raw/north-america.csv")
sa_cities, sa_avgs, sa_maxs, sa_mins = get_temperature_spread("./raw/south-america.csv")
europe_cities, europe_avgs, europe_maxs, europe_mins = get_temperature_spread("./raw/europe.csv")
asia_cities, asia_avgs, asia_maxs, asia_mins = get_temperature_spread("./raw/asia.csv")
oceania_cities, oceania_avgs, oceania_maxs, oceania_mins = get_temperature_spread("./raw/oceania.csv")

In [34]:
   
@interact
def plot_interactive(city_size=["all", "mega", "large", "medium", "small"], continent=["NA", "SA", "Europe", "Asia", "Oceania"]):
    [cities, maxs, mins] = {
        "NA": [na_cities, na_maxs, na_mins], 
        "SA": [sa_cities, sa_maxs, sa_mins], 
        "Europe": [europe_cities, europe_maxs, europe_mins], 
        "Asia": [asia_cities, asia_maxs, asia_mins], 
        "Oceania": [oceania_cities, oceania_maxs, oceania_mins]
    }[continent]
        
    indices = {
        "all": np.arange(len(cities)), 
        "mega": get_indices_matching_type(cities, CityType.MEGA),
        "large": get_indices_matching_type(cities, CityType.LARGE),
        "medium": get_indices_matching_type(cities, CityType.MEDIUM),
        "small": get_indices_matching_type(cities, CityType.SMALL)
    }[city_size]
    
    figlength = max(min(int(len(indices) ** 0.5) * 2, 10), 4)
    
    plot_labeled_scatterplot(
        cities[indices], 
        maxs[indices], 
        mins[indices], 
        title=f"Temperature spread of {city_size} cities in {continent}", 
        xlabel="Maximum monthly average temperature (ºC)", 
        ylabel="Minimum monthly average temperature (ºC)",
        fig_kwargs={"figsize": (figlength, figlength)}
    )
    


interactive(children=(Dropdown(description='city_size', options=('all', 'mega', 'large', 'medium', 'small'), v…