<a id="map"></a>
## Plot Tweet locations on a Map 

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
import numpy as np
import matplotlib.pyplot as plt
import geopandas
from geopy.geocoders import Nominatim

In [2]:
airlines_keyworded = pd.read_csv('./data/airlines_keyworded.csv')

airlines_keyworded.drop(['Unnamed: 0','Unnamed: 0.1'],axis=1,inplace=True)

In [3]:
def get_coord_lat(place):
    geolocator = Nominatim(user_agent='myapplication',timeout=None)
    location = geolocator.geocode(place)
    if location != None:
        return location.raw['lat']

def get_coord_lon(place):
    geolocator = Nominatim(user_agent='myapplication',timeout=None)
    location = geolocator.geocode(place)
    if location != None:
        return location.raw['lon']

In [4]:
get_coord_lat ('merlion park singapore')

'1.28572965'

In [None]:
get_coord_lon('merlion park singapore')

'103.854207036822'

In [None]:
# Create two lists for the loop results to be placed
lat = []
lon = []

# For each row in a varible,
for row in airlines_keyworded['tweet_coord']:
    # Try to,
    try:
        # Split the row by comma and append
        # everything before the comma to lat
        lat.append(row.split(',')[0])
        # Split the row by comma and append
        # everything after the comma to lon
        lon.append(row.split(',')[1])
    # But if you get an error
    except:
        # append a missing value to lat
        lat.append(np.NaN)
        # append a missing value to lon
        lon.append(np.NaN)

airlines_keyworded.loc[:,'latitude']= lat
airlines_keyworded.loc[:,'longitude']= lon

airlines_keyworded.loc[:,'latitude'] = airlines_keyworded.loc[:,'latitude'].str.strip('[')
airlines_keyworded.loc[:,'longitude'] = airlines_keyworded.loc[:,'longitude'].str.strip(']')

airlines_keyworded.loc[:,'latitude'] = airlines_keyworded.loc[:,'latitude'].str.strip(' [')
airlines_keyworded.loc[:,'longitude'] = airlines_keyworded.loc[:,'longitude'].str.strip(']}')

airlines_keyworded.loc[:,'latitude'] = airlines_keyworded.loc[:,'latitude'].astype(float).fillna(0.0)
airlines_keyworded.loc[:,'longitude'] = airlines_keyworded.loc[:,'longitude'].astype(float).fillna(0.0)

In [None]:
airlines_keyworded[airlines_keyworded.loc[:,'latitude']==0.0]['latitude'] = airlines_keyworded.loc[:,'tweet_location'].apply(get_coord_lat)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [None]:
airlines_keyworded[airlines_keyworded.loc[:,'longitude']==0.0]['longitude'] = airlines_keyworded.loc[:,'tweet_location'].apply(get_coord_lon)

In [None]:
airlines_keyworded[airlines_keyworded.loc[:,'longitude']==0.0]

In [None]:
airlines_keyworded.to_csv('./data/airlines_coordinates.csv')

airlines = pd.read_csv('./data/airlines_coordinates.csv')

In [None]:
usa_only = airlines[(airlines.loc[:,'latitude'] >= 20)&(airlines.loc[:,'latitude'] <= 50)&(airlines.loc[:,'longitude'] <= -65)&(airlines.loc[:,'longitude'] >= -130)]

staff = usa_only[usa_only['keywords'].str.contains('staff',regex=False)]
delay = usa_only[usa_only['keywords'].str.contains('delay',regex=False)]
ticketing = usa_only[usa_only['keywords'].str.contains('ticketing_billing',regex=False)]

staff_coord = geopandas.GeoDataFrame(staff, geometry=geopandas.points_from_xy(staff.longitude, staff.latitude))
delay_coord = geopandas.GeoDataFrame(delay, geometry=geopandas.points_from_xy(delay.longitude, delay.latitude))
ticketing_coord = geopandas.GeoDataFrame(ticketing, geometry=geopandas.points_from_xy(ticketing.longitude, ticketing.latitude))

In [None]:
#for continental usa only
usa_map = geopandas.read_file('./states_21basic/states.shp')
usa_map = usa_map.loc[1:49,:]

ax = usa_map.plot(color='grey', edgecolor='white',figsize=(30, 20))

# We can now plot our GeoDataFrame.
staff_coord.plot(ax=ax, color='red',label='Staff')
delay_coord.plot(ax=ax, color='blue',label='Delays')
ticketing_coord.plot(ax=ax, color='gold',label='Ticketing & Billing')
plt.legend(prop={'size':15})

plt.show()