In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
"""
Geocoding & reverse geocoding the 2017 regular season games of the LA Chargers; 
I also decided to plot each game location on GoogleMaps just for added fun.

###############################################################################

Additional imports to handle geocoding, map options, etc.
"""

from pygeocoder import Geocoder

from bokeh.io import output_file, output_notebook, show
from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, LogColorMapper, BasicTicker, ColorBar,
    DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)
from bokeh.models.mappers import ColorMapper, LinearColorMapper
from bokeh.palettes import Viridis5

In [3]:
"""
Create a dictionary of raw data.
"""

regsc_dta = {'At Broncos': '39.7439392,-105.020105',
             'Vs. Dolphins': '33.8643777,-118.2611426',
             'Vs. Chiefs': '33.8643777,-118.2611426',
             'Vs. Eagles': '33.8643777,-118.2611426',
             'At Giants': '40.8128397,-74.0742091',
             'At Raiders': '37.7515946,-122.2005458',
             'Vs. Broncos': '33.8643777,-118.2611426',
             'At Patriots': '42.0909458,-71.2643465',
             'At Jaguars': '30.3239248,-81.6373228',
             'Vs. Bills': '33.8643777,-118.2611426',
             'At Cowboys': '32.7472844,-97.0944939',
             'Vs. Browns': '33.8643777,-118.2611426',
             'Vs. Redskins': '33.8643777,-118.2611426',
             'At Chiefs': '39.0489391,-94.4839157',
             'At Jets': '40.8128397,-74.0742091',
             'At Raiders': '37.7515946,-122.2005458'}

# Chargers regular season game locations - 2017.
#
# SAF: 1701 Bryant St, Denver, CO 80204
# SHC: 18400 S Avalon Blvd, Carson, CA 90746
# SHC: 18400 S Avalon Blvd, Carson, CA 90746
# SHC: 18400 S Avalon Blvd, Carson, CA 90746
# MLS: 1 MetLife Stadium Dr, East Rutherford, NJ 07073
# O.co: 7000 Coliseum Way, Oakland, CA 94621
# SHC: 18400 S Avalon Blvd, Carson, CA 90746
# Gil: 1 Patriot Pl, Foxborough, MA 02035
#                  Bye Week                          #
# EvF: 1 Everbank Field Dr, Jacksonville, FL 32202
# SHC: 18400 S Avalon Blvd, Carson, CA 90746
# ATT: 1 AT&T Way, Arlington, TX 76011
# SHC: 18400 S Avalon Blvd, Carson, CA 90746
# SHC: 18400 S Avalon Blvd, Carson, CA 90746
# Arh: 1 Arrowhead Dr, Kansas City, MO 64129
# MLS: 1 MetLife Stadium Dr, East Rutherford, NJ 07073
# O.co: 7000 Coliseum Way, Oakland, CA 94621

In [4]:
"""
Create DataFrame from dictionary.
"""

regsc_df = pd.DataFrame.from_dict(regsc_dta, orient='index')
regsc_df

Unnamed: 0,0
At Broncos,"39.7439392,-105.020105"
Vs. Dolphins,"33.8643777,-118.2611426"
Vs. Chiefs,"33.8643777,-118.2611426"
Vs. Eagles,"33.8643777,-118.2611426"
At Giants,"40.8128397,-74.0742091"
At Raiders,"37.7515946,-122.2005458"
Vs. Broncos,"33.8643777,-118.2611426"
At Patriots,"42.0909458,-71.2643465"
At Jaguars,"30.3239248,-81.6373228"
Vs. Bills,"33.8643777,-118.2611426"


In [5]:
"""
Seperate the strings into lat. and long. and convert to floats.
Create two lists; one for latd, the other for longd.
"""

latd = []
longd = []

for row in regsc_df[0]:
    try:
        """
        Split, convert, append everything before the comma to latd.
        """
        
        latd.append(float(row.split(',')[0]))
        """
        Split, convert, append everything after the comma to longd.
        """
        
        longd.append(float(row.split(',')[1]))
    
        """
        Check for errors; append if necessary.
        """
    except:
        latd.append(np.NaN)
        longd.append(np.NaN)

"""
Create new columns from latd and longd.
"""

regsc_df['Latitude'] = latd
regsc_df['Longitude'] = longd

In [6]:
"""
Get improved DataFrame.
"""

regsc_df

Unnamed: 0,0,Latitude,Longitude
At Broncos,"39.7439392,-105.020105",39.743939,-105.020105
Vs. Dolphins,"33.8643777,-118.2611426",33.864378,-118.261143
Vs. Chiefs,"33.8643777,-118.2611426",33.864378,-118.261143
Vs. Eagles,"33.8643777,-118.2611426",33.864378,-118.261143
At Giants,"40.8128397,-74.0742091",40.81284,-74.074209
At Raiders,"37.7515946,-122.2005458",37.751595,-122.200546
Vs. Broncos,"33.8643777,-118.2611426",33.864378,-118.261143
At Patriots,"42.0909458,-71.2643465",42.090946,-71.264347
At Jaguars,"30.3239248,-81.6373228",30.323925,-81.637323
Vs. Bills,"33.8643777,-118.2611426",33.864378,-118.261143


In [7]:
"""
Apply reverse geocoding by feeding a specific latitude, longitude pair
(the Raiders in this case) into pygeocoder's reverse_geocoder() function.
"""

results = Geocoder.reverse_geocode(regsc_df['Latitude'][5], regsc_df['Longitude'][5])

In [8]:
"""
Check Raiders coordinates for accuracy.
"""

results.coordinates

(37.7523526, -122.1993104)

In [9]:
"""
Confirm accuracy of the city.
"""

results.city

'Oakland'

In [10]:
"""
We know where the game is, but what's the stadium's address?

NOTE: Google won't give us the address. Why?
"""

results.street_address

In [11]:
"""
Use admin_level_1 to answer the question: 
What state is the game being played in?
"""

results.administrative_area_level_1

'California'

In [12]:
"""
Do we have the right address?
Input the address and use geocode function to verify.

NOTE: Google seems a bit picky; sometimes the correct address returns 'False' and vice versa. Why?
"""

Geocoder.geocode("700 Coliseum Way, Oakland, CA 94621").valid_address

True

In [13]:
"""
GeoMapping the Team Schedule

Create csv with data, add a 'home-game' column then load.
"""

regsc_df = pd.read_csv('../../../data/NFL/GeoMapping_Chargers.csv')
regsc_df.head()

Unnamed: 0,competitions,latitude,longitude,venue,all_games,venue_capacity,venue_capacity_raw
0,Chargers at Broncos,39.743939,-105.020105,Sports Authority Field at Mile High,60,76125,76125
1,Chargers vs. Dolphins,33.864378,-118.261143,StubHub Center,10,27167,27167
2,Chargers vs. Chiefs,33.864378,-118.261143,StubHub Center,20,27167,27167
3,Chargers vs. Eagles,33.864378,-118.261143,StubHub Center,30,27167,27167
4,Chargers at Giants,40.81284,-74.074209,MetLife Stadium,60,82500,82500


In [14]:
regsc_df_i = regsc_df[(regsc_df['all_games'] <= 1)]
print(regsc_df.size)
print(regsc_df_i.size)

112
0


In [15]:
"""
Map game locations with Google Maps.
"""

type(regsc_df.latitude.tolist())

list

In [16]:
map_options = GMapOptions(lat=39.5, lng=-98.35, map_type="roadmap", zoom=4)

plot = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options
)
plot.title.text = "Plot of LA Chargers 2017 Regular Season Games"

"""
Add API key.
"""

plot.api_key = "ADD_YOUR_API_KEY"

source = ColumnDataSource(
    data=dict(
        lat=regsc_df.latitude.tolist(),
        lon=regsc_df.longitude.tolist(),
        size=regsc_df.all_games.tolist(),
        color=regsc_df.venue_capacity.tolist()
    )
)
max_venue_capacity_raw = regsc_df.loc[regsc_df['venue_capacity'].idxmax()]['venue_capacity']
min_venue_capacity_raw = regsc_df.loc[regsc_df['venue_capacity'].idxmin()]['venue_capacity']

color_mapper = LinearColorMapper(palette=Viridis5)
circle = Circle(x="lon", y="lat", size="size", fill_color={'field': 'color', 'transform': color_mapper}, fill_alpha=0.4, line_color=None)
plot.add_glyph(source, circle)
color_bar = ColorBar(color_mapper=color_mapper, ticker=BasicTicker(),
                     label_standoff=12, border_line_color=None, location=(0,0))
plot.add_layout(color_bar, 'right')
plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())

"""
Generate HTML Page Output with Map
"""
output_file("chargers_2017_schedule_plot.htm")
output_notebook()

show(plot)

W-1005 (SNAPPED_TOOLBAR_ANNOTATIONS): Snapped toolbars and annotations on the same side MAY overlap visually: GMapPlot(id='49d500b0-e1c3-4851-ad45-d77a00436a69', ...)


W-1005 (SNAPPED_TOOLBAR_ANNOTATIONS): Snapped toolbars and annotations on the same side MAY overlap visually: GMapPlot(id='49d500b0-e1c3-4851-ad45-d77a00436a69', ...)


In [None]:
"""
Geocoding & reverse geocoding the 2017 regular season games of the LA Chargers; 
I also decided to plot each game location on GoogleMaps just for added fun. 

##########################################################################################################

NOTE:

    1. I'm pretty certain that there is a better way to plot each location on the map.
    2. I had to provide numerical variations to each game in order for them to show up 'correctly';
       boolean values (my initial attempt) did not work as intended.
    3. Because of home games, or games played in at same stadium, there is some plot overlapping; 
       I used different values in order to highlight each game played in the same location.
    4. There were a few issues, notably, those that deal with getting the correct address to be returned
       by Google, etc., but perhaps they are minor in nature and easily fixed.

##########################################################################################################

CONCLUSIONS:

Working with GoogleMaps in jupyter is quite fun and provides some interesting possibilities; think, 
bioinformatics, species plotting (my next adventure), ecosphere plotting, business locations/branches, etc.
There are lots of possibilities that go well beyond this simple demonstrative example.

##########################################################################################################

SOURCE:

I got the idea of GeoMapping the games from Big Endian Data.
"""