In [3]:
# Dependencies and Setup
import cProfile as cPr
import gc
from math import radians, sin, cos, sqrt, atan2
from requests import Session
import time
import os

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ratelimiter import RateLimiter as RL
from scipy.stats import linregress

from requests_futures.sessions import FuturesSession 

from API.api_key import api_key as ak

# Incorporated citipy to determine city based on latitude and longitude
from citipy.citipy import nearest_city as nc, WORLD_CITIES_DICT as WCD

# Range of latitudes and longitudes 
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [26]:
#generates n random coordinate pairs as (latitude,longitude)
def random_coords(n):
    lat = np.random.uniform(lat_range[0], lat_range[1],n)
    lng = np.random.uniform(lng_range[0], lng_range[1],n)
    return zip(lat,lng)

In [27]:
def city_builder(n):
    allcities = []
    while len(allcities)<n:
        coordcity = {t: nc(*t).city_name for t in random_coords(n)}
        ashortlist = [coordcity[x] for x in coordcity]
        cities = allcities + ashortlist
        allcities = list(set(cities))
    finalcities = np.random.choice(allcities, n,replace=False)
    return finalcities

def city_plotter(cities):
    dic = {v.city_name: k for k,v in WCD.items()}
    gencoordy = []
    gencoordx = []
    for city in cities:
        gencoordy.append(dic[city][0])
        gencoordx.append(dic[city][1])

    fig, ax = plt.subplots(1,1,figsize=(10,12))
    countries = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
    countries.plot(color="grey",ax=ax)
    ax.plot(gencoordx,gencoordy,'bo',ms=5,alpha=.8)
    ax.grid(b=True, alpha=0.6)
    plt.savefig('outputs/worldmap.png')
    plt.close('all')
    return

In [28]:
def url_builder(cities,key):
    begin_url, api_url = f'http://api.openweathermap.org/data/2.5/weather?q=', f'&appid={key}&units=imperial'
    curl = lambda c: f"{begin_url}{c}{api_url}"
    urls = (curl(c) for c in cities)
    return urls

In [29]:
def url_split(url):
    url_split1 = url.split('=')
    url_split2 = url_split1[1].split('&')
    city = url_split2[0]
    return city

def call(s,u,counter):
    WeatherData = dict.fromkeys(['City','Country','Date','Latitude','Longitude','Max Temperature (F)','Humidity','Cloudiness','Wind Speed'])
    getcity = url_split
    city = getcity(u)
    try:
        res = s.get(u)
        res.raise_for_status()
        jres = res.json()
        WeatherData['City']            = jres['name']
        WeatherData['Country']         = jres['sys']['country']
        WeatherData['Date']            = jres['dt']
        WeatherData['Latitude']        = jres['coord']['lat']
        WeatherData['Longitude']       = jres['coord']['lon']
        WeatherData['Max Temperature (F)'] = jres['main']['temp_max']
        WeatherData['Humidity']        = jres['main']['humidity']
        WeatherData['Cloudiness']      = jres['clouds']['all']
        WeatherData['Wind Speed']      = jres['wind']['speed']
        print(f"===Record {counter}: {city.capitalize()} | <Status: {res.status_code}>===")
        with open('outputs/requestslog.txt' ,'a') as file:
            file.write(f"-Record {counter}: {city.capitalize()} | <Status: {res.status_code}>-\n")
        return WeatherData

    except:
        print(f'===<Status: {s.get(u).status_code}> | Request for city "{city.capitalize()}" was unsuccessful. Skipping...===')
        with open('outputs/requestslog.txt' ,'a') as file:
            file.write(f'<Status: {s.get.status_code}> | Request for city "{city.capitalize()}" was unsuccessful. Skipping...\n')
        return {city.capitalize(): 'Request Unsuccessful'}

In [30]:
def all_calls(urls):
    sesh = Session()
    counter = 1
    CityWeather = []
    print('*******************************')
    print('Commence Data Retrievals')
    print('*******************************')
    for u in urls:
        res = call(sesh,u,counter)
        if [*res.values()][0] == 'Request Unsuccessful':
            pass
        else:
            CityWeather.append(res)
            counter += 1
    print('*******************************')
    print('Incredibly Long List Finally Complete')
    print('*******************************')
    return CityWeather

def tocsv(d):
    df = pd.DataFrame(d)
    path='outputs/worldweather.csv'
    df.to_csv(path)
    print('Data exported to csv file.')
    return                                                           

# To request every response:
## Choose how many requests to make (n)
## Call 'city_writer(n)' to generate n random cities and plot their coordinates with a map of the world underneith
## Call 'url_builder(cities,key) with 'ak' as key and the cities returned by 'city_writer'
## Call 'all_calls(urls)' using the generator returned by 'url_builder'
## Finally, call 'tocsv(d)' using the dictionary returned by 'all_calls(urls)'
## You should be left with a print log of all the calls and a csv contaning all of the response data

In [17]:
@RL(max_calls=60,period=60)
def finale(n):
    try:
        builder = city_builder
        cities = builder(n)
        plot = city_plotter
        plot(cities)
        urls = url_builder
        calls = all_calls
        tocsv(calls(urls(cities,ak)))
    except:
        if os.path.exists("outputs/requestslog.txt"):
            os.remove("outputs/requestslog.txt")
        else:
            pass
        if os.path.exists("outputs/worldmap.png"):
            os.remove("outputs/worldmap.png")
        else:
            pass
    return

In [18]:
finale(1500)

*******************************
Commence Data Retrievals
*******************************
===Record 1: Loa janan | <Status 200>===
===Record 2: Sakakah | <Status 200>===
===Record 3: Usakos | <Status 200>===
===Record 4: Rundu | <Status 200>===
===Record 5: Kjopsvik | <Status 200>===
===<Status: 404> | Request for city "Galiwinku" was unsuccessful. Skipping...===
===Record 6: Airai | <Status 200>===
===Record 7: Astoria | <Status 200>===
===Record 8: Mashhad | <Status 200>===
===Record 9: Puerto ayora | <Status 200>===
===Record 10: Narsaq | <Status 200>===
===Record 11: Itarema | <Status 200>===
===Record 12: Sembakung | <Status 200>===
===Record 13: Suez | <Status 200>===
===Record 14: Garissa | <Status 200>===
===Record 15: Saint-jovite | <Status 200>===
===Record 16: Carora | <Status 200>===
===<Status: 404> | Request for city "Azimur" was unsuccessful. Skipping...===
===Record 17: Kyren | <Status 200>===
===Record 18: Isiolo | <Status 200>===
===Record 19: Vila velha | <Status 200>

# Function Graveyard
### In my attempt to minimize the number of city requests that responded with a 404, I decided to write an analog to citipy's 'nearest_city' function using OpenWeatherMap's own list of cities.
### Their list, a ~40 mB .json file with over 4 million entries (citipy's list has around 47,000), proved messy in content and difficult for my computer to process. 
### There were thousands of cities with the same name and country, just with slightly shifted coordinates, likely to give felxibility in searching by coordinate.
### After an attempt to clean, the set still had around a million cities.
### I made several attempts at a work-around (converting the .json to a DataFrame, then a .csv, taking the .json in chunks, streaming the .json, using different .json-serialization libraries, attempting to write asynchronously, avoiding list appends and loops as much as possible, etc...) but none of my code was fast enough to bare generating 100 cities, let alone over 1000 cities. This was depsite my code being almost identical to the inner workings of citipy's function.
### So, I swallowed my pride and returned to using citipy, which of course, turned out to work fine, with only about 100  cities returning a 404 status code when requested.

In [1]:
# # #finds the nearest city to any coordinate using OpenWeather's city list
# def coordcity(n):
#     return {t: NC(*t).city_name for t in random_coords(n)}

# def haversine(lat0,lng0,lat1,lng1):
#     latr0, latr1, lngr0, lngr1 = radians(lat0), radians(lat1), radians(lng0), radians(lng1)
#     dlat, dlng = (latr1-latr0), (lngr1-lngr0)
#     a = sin(dlat/2)**2 + cos(latr0)*cos(latr1)*sin(dlng/2)**2
#     c = 2 * atan2(sqrt(a), sqrt(1-a))
#     d = 6371 * c
#     return d

# def latlngfilter(lat,lng,n):
#     cities = [*data().index.values]
#     latlow, lathigh = lat - n, lat + n
#     lnglow, lnghigh = lng - n, lng + n
#     lam = lambda x: x[0] >= latlow and x[0] <= lathigh and x[1] >= lnglow and x[1] <= lnghigh
#     filtered = filter(lam,cities)
#     return filtered
    
# def bign(lat,lng):
#     n = 5
#     fil = lambda x: [*latlngfilter(lat,lng,x)]
#     while len(fil(n)) < 1:
#         n += 5
#     return n + 1

# def plantree(lat,lng):
#     n = bign(lat,lng)    
#     coordtree = spatial.cKDTree([x for x in latlngfilter(lat,lng,n)])
#     return coordtree

# def nc(lat,lng):
#     tree = plantree(lat,lng)
#     n = bign(lat,lng)
#     tree_query = tree.query_ball_point((lat,lng),n)
#     while len(tree_query) == 0:
#         n = n + 10
#         if n == 1000 and len(tree_query) == 0:
#             print('No city found | Default: Chicago')
#             return 'chicago'
#     havpairs = [(x,haversine(lat,lng,*x)) for x in tree.data[tree_query]]
#     if len(havpairs) == 0:
#         print('No city found | Default: Chicago')
#         return 'chicago'

# # Identify nearest city for each lat, lng combination
# def populatecities(n):
#     cities = []
#     while len(cities) < n:
#         noeditcities = [nc(x[0],x[1]) for x in random_coords(n)]
#         cities = list(set(noeditcities)) 
#     return cities

#     havmin = min(havpairs,key=lambda y: y[1])
#     return OWcities()[tuple(havmin[0])]

# # Identify nearest city for each lat, lng combination
# def populatecities2(n):
#     counter = 0
#     citydict = {}
#     allcities = []
#     while len(allcities) < n:
#         coords = [*random_coords(n)]
#         cities = (nc(x[0],x[1]) for x in coords if nc(x)!='antarctica')
#         citydict.update({counter: list(set(cities))})
#         counter += 1
#         allcities = [y for x in citydict.values() for y in x]
#         allcities = list(set(allcities))
#     return allcities

# Let's move on to the analysis!

In [35]:
def csvtoDF():
    weatherdf = pd.read_csv('outputs/worldweather.csv')
    return weatherdf

def latitudes():
    df = csvtoDF
    lats = [*df()['Latitude'].values]
    return lats

def northern():
    df = csvtoDF
    north = df()[df()['Latitude'] >= 0]
    return north

def southern():
    df = csvtoDF
    south = df()[df()['Latitude'] < 0]
    return south

def initialplots(linreg=False):
    north = northern
    south = southern
    xaxis = ['Max Temperature (F)','Humidity','Cloudiness', 'Wind Speed']
    for x in xaxis:
        yn = [*north()['Latitude'].values]
        xn = [*north()[x].values]
        ys = [*south()['Latitude'].values]
        xs = [*south()[x].values]
        if linreg != False:
            for z in [(xn,yn,'Northern','cool_r'),(xs,ys,'Southern','cool')]:
                fig, ax = plt.subplots(1,1,figsize=(10,10))
                m, b, r2, p, err = linregress(z[0],z[1])
                merr = linregress(z[0],z[1]).stderr
                berr = linregress(z[0],z[1]).intercept_stderr               
                ax.scatter(z[0],z[1],c=z[1],cmap=z[3])
                xlims = ax.get_xlim()
                xvals = np.linspace(xlims[0],xlims[1],1500)
                yvals = [m*n + b for n in xvals]
                ax.plot(xvals,yvals,color='k',label=f'r2 = {round(r2,4)}')
                ax.set_xlabel(x)
                ax.set_ylabel('Latitude')
                ax.set_title(f'{x} vs. {z[2]} Latitudes with Regression')
                ax.legend()
                regpath = f'outputs/{x}vs{z[2]}LatitudeRegression.png'
                plt.savefig(regpath)
                plt.close('all')
                print(f'***** Regression Results for {x} in the {z[2]} Hemisphere *****')
                print(f"In the {z[2]} hemisphere, on average, a city's {x} (x) and its latitude (L) have relation [L = {round(m,2)}*x + {round(b,2)}].")
                print(f"This relationship fits the data with a coefficient of determination of {round(r2,4)} and the regression has a p-value of {p}. The estimates for slope and intercept have error {round(merr,4)} and {round(berr,4)}, respectively.")
        else:
            fig, ax = plt.subplots(1,1,figsize=(10,10))
            ax.scatter(xn,yn,c=yn,cmap='cool_r')
            ax.scatter(xs,ys,c=ys,cmap='cool')
            ax.set_xlabel(x)
            ax.set_ylabel('Latitude')
            ax.set_title(f'{x} vs. Latitude')
            path = f'outputs/{x}vsLatitude.png'
            plt.savefig(path)
            plt.close('all')


In [27]:
initialplots(linreg=False)

# Initial Response

## From the look of these plots, the only weather attribute that is strongly related to Latitude is Max Temperature. As intuition would suggest, the maximum temperature increases as you get closer to the equator.

## The three other plots don't seem to show any correlation whatsoever between Latitude and the respecitve weather attribute.


In [36]:
initialplots(linreg=True)

***** Regression Results for Max Temperature (F) in the Northern Hemisphere *****
In the Northern hemisphere, on average, a city's Max Temperature (F) (x) and its latitude (L) have relation [L = -0.78*x + 85.81].
This relationship fits the data with a coefficient of determination of -0.8129 and the regression has a p-value of 1.2325007937636762e-235. The estimates for slope and intercept have error 0.0177 and 1.1724, respectively.
***** Regression Results for Max Temperature (F) in the Southern Hemisphere *****
In the Southern hemisphere, on average, a city's Max Temperature (F) (x) and its latitude (L) have relation [L = 0.72*x + -70.91].
This relationship fits the data with a coefficient of determination of 0.723 and the regression has a p-value of 1.1591465211477142e-63. The estimates for slope and intercept have error 0.0351 and 2.5854, respectively.
***** Regression Results for Humidity in the Northern Hemisphere *****
In the Northern hemisphere, on average, a city's Humidity (x) 

# Regression Analysis

## Like I said above, temperature and latitude certainly have a linear relationship, with temperature increasing as you get closer to the equator. The linear regressions have both relatively high absolute coefficients of determination and almost-zero p-values.

## Every other regression shows small coefficients of determination and larger p-values, meaning we can essentially reject the hypotheisis that the estimated line fits the data better than a line with slope zero.