In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt 
from sklearn.linear_model import LinearRegression

In [15]:
data = pd.read_csv("../data/DataWithLocationCleaned.csv.gz")
data.head()

Unnamed: 0,Port Name,State,Port Code,Border,Date,Measure,Value,Year,Month,position
0,Warroad,Minnesota,3423,US-Canada Border,2019-06-01,Truck Containers Full,133,2019,6,"(48.905266, -95.314404)"
1,Warroad,Minnesota,3423,US-Canada Border,2019-06-01,Truck Containers Empty,298,2019,6,"(48.905266, -95.314404)"
2,Warroad,Minnesota,3423,US-Canada Border,2019-06-01,Personal Vehicles,10383,2019,6,"(48.905266, -95.314404)"
3,Warroad,Minnesota,3423,US-Canada Border,2019-06-01,Personal Vehicle Passengers,19459,2019,6,"(48.905266, -95.314404)"
4,Warroad,Minnesota,3423,US-Canada Border,2019-06-01,Pedestrians,2,2019,6,"(48.905266, -95.314404)"


In [16]:
persons = data[data['Measure'].isin(['Bus Passengers','Pedestrians','Personal Vehicle Passengers','Train Passengers'])].reset_index().drop(columns=['index'])
persons.head()

Unnamed: 0,Port Name,State,Port Code,Border,Date,Measure,Value,Year,Month,position
0,Warroad,Minnesota,3423,US-Canada Border,2019-06-01,Personal Vehicle Passengers,19459,2019,6,"(48.905266, -95.314404)"
1,Warroad,Minnesota,3423,US-Canada Border,2019-06-01,Pedestrians,2,2019,6,"(48.905266, -95.314404)"
2,Warroad,Minnesota,3423,US-Canada Border,2019-06-01,Bus Passengers,63,2019,6,"(48.905266, -95.314404)"
3,Roseau,Minnesota,3426,US-Canada Border,2019-06-01,Personal Vehicle Passengers,7385,2019,6,"(48.7710371, -95.7697882)"
4,Roseau,Minnesota,3426,US-Canada Border,2019-06-01,Bus Passengers,118,2019,6,"(48.7710371, -95.7697882)"


In [17]:
persons_location = persons[['position','Value']].groupby('position').sum().reset_index()
persons_location.head()

Unnamed: 0,position,Value
0,"('44.911752', '-66.985965')",4179922
1,"('48.999904', '-116.180196')",5224679
2,"(25.9140256, -97.4890856)",369145766
3,"(26.0922991, -97.9572242)",85353154
4,"(26.3530256, -98.216445)",406092835


In [18]:
### Plotly for interactive plots
import plotly.express as px
import plotly.graph_objects as go

def getlat(x):
    lat = x.partition(',')[0][1:]
    if lat[0] == "'":
        lat = lat[1:-1]
    return lat

def getlong(x):
    lat = x.partition(',')[2]
    if lat[1] == "'":
        lat = lat[2:-2]
    else:
        lat = lat[1:-1]
    return lat

persons_location['latitude'] = persons_location['position'].apply(lambda x: getlat(x))
persons_location['longitude'] = persons_location['position'].apply(lambda x: getlong(x))

ps = data[['Port Name','position']].drop_duplicates().set_index('position')
persons_location['Ports'] = persons_location.position.apply(lambda x : ', '.join(ps.loc[x].values.flatten()))
persons_location['text'] = persons_location['Ports'] + '<br>Crossings: ' + (persons_location['Value']/1e6).astype(str)+' million'

color = "crimson"
scale = 500000

fig = go.Figure()
fig.add_trace(go.Scattergeo(
    locationmode = 'USA-states',
    lon = persons_location['longitude'],
    lat = persons_location['latitude'],
    text = persons_location['text'],
    marker = dict(
        size = persons_location['Value']/scale,
        color = color,
        line_color='rgb(23,203,40)',
        line_width=0.5,
        sizemode = 'area')))

fig.update_layout(
        title_text = 'US Borders, total inbound persons since 1996<br>(Click legend to toggle traces)',
        showlegend = False,
        geo = dict(
            scope = 'usa',
            landcolor = 'rgb(217, 217, 217)',
        )
    )

fig.show()