In [None]:
%matplotlib inline

In [None]:
import sys

## Basemap

Basemap was installed with the following commands:

`wget https://github.com/matplotlib/basemap/archive/v1.0.7rel.tar.gz`

`pip install --user v1.0.7rel.tar.gz`

Maybe this could be installed with pip3 for Python3, I don't know. For another day.

In [None]:
sys.path.insert(0,'/home/mehrman2/.local/lib/python2.7/site-packages/mpl_toolkits')

In [None]:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np

Minimal test plot:

In [None]:
m = Basemap(width=12000000,height=9000000,projection='lcc',
            resolution=None,lat_1=45.,lat_2=55,lat_0=50,lon_0=-107.)
m.etopo()
plt.show()

## Mapping Geocodes

First, the usual functions...

In [None]:
import json, requests
import pandas as pd

In [None]:
unigrams=[]
for i in range(6):
    with open("/home/cline/NYT_SPEED/Unigrams/{0}.json".format(i)) as f:
        unis = json.load(f)
    unigrams.extend(unis)

with open("NYT.key") as key:
    for line in key:
        NYT_KEY = line
    
def get_solr_data(uni_id):

    aid = unigrams[uni_id]['aid']
    query = "http://localhost:8983/solr/derived/select?q=aid%3A{0}&wt=json&indent=true".format(aid)
    r = requests.get(query)
    
    return(r)

def get_geo_df(uni_id):
    
    # make dataframe from Solr data
    keys = ['geolocation', 'geolocation_locations', 'geolocation_probabilities']

    r = get_solr_data(uni_id)
    js = r.json()['response']['docs'][0]
    try:
        df = pd.DataFrame([js[key] for key in keys]).T
        df.columns = keys
        df
    except KeyError:
        df = pd.DataFrame(columns=keys)
        
    return(df)


#### Plot One Point

Takes an unigram ID and the NYT API key and plots the geolocation from Solr with the highest probability.

In [None]:
def plot_one(uni_id):
    
    df = get_geo_df(uni_id)
    
    if df.shape[0] < 1:
        raise IndexError('Geolocations == 0. Try a different article')
        
    coords = df.sort_values('geolocation_probabilities', ascending=False).iloc[0,0].split(",")

    coords = map(float, coords)
    
    plt.figure(figsize=(12, 9), dpi=80)
    
    m = Basemap(projection='gall',
                llcrnrlon = coords[1] - 15,              # lower-left corner longitude
                llcrnrlat = coords[0] - 10,               # lower-left corner latitude
                urcrnrlon = coords[1] + 15,               # upper-right corner longitude
                urcrnrlat = coords[0] + 10,               # upper-right corner latitude
                resolution = 'l',
                area_thresh = 100000.0)


    m.drawcoastlines()
    m.drawcountries()
    m.drawstates()
    m.fillcontinents(color = 'gainsboro')
    m.drawmapboundary(fill_color='steelblue')

    x, y = m(coords[1], coords[0]) # IS LON, LAT !!!!

    m.plot(x, y, 'ro', markersize=18, alpha=0.7)

    plt.show()
    
    return None

In [None]:
plot_one(0)

#### Plot Many Points

The size of the points is proportional to the confidence of the prediction.

In [None]:
def plot_many(uni_id, num_to_plot):
    
    df = get_geo_df(uni_id)
    
    if df.shape[0] < num_to_plot:
        raise IndexError('Geolocations < number to plot. Try a lower value or a different article')
    
    df = df.sort_values('geolocation_probabilities', ascending=False).iloc[0:num_to_plot]

    s = df['geolocation'].str.split(',').apply(pd.Series, 1)
    s.columns = ['lat', 'lon']
    s['probs'] = df['geolocation_probabilities']

    s = s.apply(lambda x: pd.to_numeric(x, errors='coerce'))

    plt.figure(figsize=(16, 12), dpi=80)

    m = Basemap(projection='gall',
                llcrnrlon = min(s.lon) - 5,              # lower-left corner longitude
                llcrnrlat = min(s.lat) - 5,               # lower-left corner latitude
                urcrnrlon = max(s.lon) + 5,               # upper-right corner longitude
                urcrnrlat = max(s.lat) + 5,               # upper-right corner latitude
                resolution = 'l',
                area_thresh = 100000.0)


    m.drawcoastlines()
    m.drawcountries()
    m.drawstates()
    m.fillcontinents(color = 'gainsboro',zorder=0)
    m.drawmapboundary(fill_color='steelblue')

    x, y = m(s['lon'].values, s['lat'].values) # IS LON, LAT !!!!


    m.scatter(x, y, marker='o', color='r', s=s['probs']*200, alpha=0.8)

    plt.show()
    
    return None

In [None]:
plot_many(5, 6)