In [1]:
import pandas as pd
import geopandas as gpd
import plotly.express as px
import numpy as np
import random
import urllib.request
import json

In [2]:
# Define the number of employees and years
num_employees = 100
start_year = 2013
end_year = 2022

# Create an empty DataFrame
data = {
    'EmployeeId': [],
    'Year': [],
    'current_year_workstate': [],
    'previous_year_workstate': []
}

# Generate data for employees
for employee_id in range(1, num_employees + 1):
    current_state = random.choice(['NY', 'CA', 'TX', 'FL', 'IL', 'MA', 'WA', 'GA'])
    previous_state = current_state  # Initialize previous state
    for year in range(start_year, end_year + 1):
        data['EmployeeId'].append(employee_id)
        data['Year'].append(year)
        data['current_year_workstate'].append(current_state)
        data['previous_year_workstate'].append(previous_state)
        previous_state = current_state  # Update previous state
        current_state = random.choice(['NY', 'CA', 'TX', 'FL', 'IL', 'MA', 'WA', 'GA'])

# Create the DataFrame
employee_data = pd.DataFrame(data)

In [4]:
# Load USA shapefile using GeoPandas
usa_map = gpd.read_file('/content/States_shapefile.shp')

In [5]:
# Define approximate center coordinates for each state
state_coordinates = {
    'NY': (40.7128, -74.0060),
    'CA': (36.7783, -119.4179),
    'TX': (31.9686, -99.9018),
    'FL': (27.994402, -81.760254),
    'IL': (40.633125, -89.398528),
    'MA': (42.4072, -71.3824),
    'WA': (47.6062, -122.3321),
    'GA': (32.1656, -82.9001)
    # Add more states as needed
}

In [6]:
# Create new columns for latitude and longitude
employee_data['current_year_latitude'] = employee_data['current_year_workstate'].map(lambda state: state_coordinates[state][0])
employee_data['current_year_longitude'] = employee_data['current_year_workstate'].map(lambda state: state_coordinates[state][1])
employee_data['previous_year_latitude'] = employee_data['previous_year_workstate'].map(lambda state: state_coordinates[state][0])
employee_data['previous_year_longitude'] = employee_data['previous_year_workstate'].map(lambda state: state_coordinates[state][1])

In [7]:
# Merge employee data with the geographical data
merged_data = usa_map.merge(employee_data, left_on='State_Code', right_on='current_year_workstate', how='right')
merged_data

Unnamed: 0,FID,Program,State_Code,State_Name,Flowing_St,FID_1,geometry,EmployeeId,Year,current_year_workstate,previous_year_workstate,current_year_latitude,current_year_longitude,previous_year_latitude,previous_year_longitude
0,5,,CA,CALIFORNIA,N,923,"MULTIPOLYGON (((-121.66522 38.16929, -121.7823...",1,2013,CA,CA,36.7783,-119.4179,36.7783,-119.4179
1,48,WA-FSIS,WA,WASHINGTON,F,966,"MULTIPOLYGON (((-122.40202 48.22522, -122.4628...",1,2014,WA,CA,47.6062,-122.3321,36.7783,-119.4179
2,33,FIS,NY,NEW YORK,F,951,"MULTIPOLYGON (((-79.76323 42.26733, -79.44402 ...",1,2015,NY,WA,40.7128,-74.0060,47.6062,-122.3321
3,44,TX-TCEQ ACR,TX,TEXAS,F,962,"MULTIPOLYGON (((-105.99889 31.39394, -106.2132...",1,2016,TX,NY,31.9686,-99.9018,40.7128,-74.0060
4,5,,CA,CALIFORNIA,N,923,"MULTIPOLYGON (((-121.66522 38.16929, -121.7823...",1,2017,CA,TX,36.7783,-119.4179,31.9686,-99.9018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,33,FIS,NY,NEW YORK,F,951,"MULTIPOLYGON (((-79.76323 42.26733, -79.44402 ...",100,2018,NY,WA,40.7128,-74.0060,47.6062,-122.3321
996,33,FIS,NY,NEW YORK,F,951,"MULTIPOLYGON (((-79.76323 42.26733, -79.44402 ...",100,2019,NY,NY,40.7128,-74.0060,40.7128,-74.0060
997,22,MA-EPICS,MA,MASSACHUSETTS,F,940,"MULTIPOLYGON (((-71.31933 41.77220, -71.33980 ...",100,2020,MA,NY,42.4072,-71.3824,40.7128,-74.0060
998,44,TX-TCEQ ACR,TX,TEXAS,F,962,"MULTIPOLYGON (((-105.99889 31.39394, -106.2132...",100,2021,TX,MA,31.9686,-99.9018,42.4072,-71.3824


In [8]:
jitter_amount = 0.5  # Adjust this value for desired spacing
merged_data['jittered_latitude'] = merged_data['current_year_latitude'] + np.random.uniform(-jitter_amount, jitter_amount, len(merged_data))
merged_data['jittered_longitude'] = merged_data['current_year_longitude'] + np.random.uniform(-jitter_amount, jitter_amount, len(merged_data))
merged_data['jittered_prev_latitude'] = merged_data['previous_year_latitude'] + np.random.uniform(-jitter_amount, jitter_amount, len(merged_data))
merged_data['jittered_prev_longitude'] = merged_data['previous_year_longitude'] + np.random.uniform(-jitter_amount, jitter_amount, len(merged_data))


In [9]:
# Create an animated scatter map using Plotly Express
fig = px.scatter_geo(
    merged_data,
    #locations='State_Code',
    #locationmode='USA-states',
    lon='jittered_longitude',
    lat='jittered_latitude',
    color='previous_year_workstate',
    animation_frame='Year',
    animation_group='EmployeeId',
    title='USA Employees Migration',
    projection='albers usa',
    basemap_visible = True,
    hover_data={'jittered_longitude':False,'jittered_latitude':False, 'Year':False, 'EmployeeId':True},
    width=1200, height=800
)

# Update map layout to better visualize movement
fig.update_geos(
    resolution=50,
    showcoastlines=True,
    coastlinecolor='RebeccaPurple',
    showland=True,
    #landcolor='white'
)

# Update markers to have a solid fill and no border
fig.update_traces(marker=dict(opacity=0.8, line=dict(width=1)))

fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000

# Customize layout
fig.update_layout(
    mapbox_style="carto-positron",
    title_x=0.5,
    margin={"r": 0, "t": 40, "l": 0, "b": 0},
    transition={"duration":500}
)

# Show the animated scatter map
fig.show()