# DS-260 Final #

### Emmalee Torson and Jacob Yanga ###

In [1]:
import pandas as pd

In [2]:
df = pd.read_spss("tastdb-exp-2020.sav")

In [3]:
# HELPER FUNCTIONS
#   Call these functions to help understand the data

def display_columns():
    ''' Displays all column names '''
    
    cols = list(df.columns)
    for col in cols:
        print(col)

def display_top_importers():
    ''' Display the top importers sorted by total number of slaves imported'''
    
    voyage_by_port = voyage_dict.sort_values(by="SLAMIMP", ascending=False).groupby("MJSELIMP").sum()
    voyage_by_port = voyage_by_port.apply(lambda x : x.sort_values(ascending=False))
    print(str(len(voyage_by_port)) + " destination ports")
    voyage_by_port.head(len(voyage_by_port))

def display_num_nans():
    ''' Display the number of Nans that appear in each column '''
    
    print("Attribute\t\tNaNs")
    print("-"*40)
    cols = list(df.columns)
    for col in cols:
        if df[col].isna().sum() < 15000:
            print(str(col) + ":\t\t" + str(df[col].isna().sum()))
            
def ports_map():
    fig2 = px.scatter_geo(voyage_dict, size=voyage_dict["SLAMIMP"])

    fig2.update_traces(
        lat=[arrive_coords[port][0] for port in arrive_coords],
        lon=[arrive_coords[port][1] for port in arrive_coords],
        marker = dict(
            color = "green",
            size=voyage_dict["SLAMIMP"]
        )
    )

    fig2.update_geos(
        center = dict(lat=15, lon=-32),
        lataxis_range=[-50,50], lonaxis_range=[-110, 60]
        )


    fig1 = px.scatter_geo(voyage_dict, size=voyage_dict["SLAMIMP"])

    fig1.update_traces(
        lat=[depart_coords[port][0] for port in depart_coords],
        lon=[depart_coords[port][1] for port in depart_coords],
        marker = dict(
            color = "red",
            size=voyage_dict["SLAMIMP"]
        )
    )

    fig1.update_geos(
        center = dict(lat=15, lon=-32),
        lataxis_range=[-50,50], lonaxis_range=[-110, 60]
        )
    
    return fig1, fig2

In [4]:
voyage_dict = df[["MJBYPTIMP", "MJSELIMP", "SLAMIMP"]].dropna()

voyage_groups = voyage_dict.groupby("MJSELIMP")
voyage_groups_dict = {group[0].lstrip():group[1] for group in voyage_groups}

voyage_by_port = voyage_dict.sort_values(by="SLAMIMP", ascending=False).groupby("MJSELIMP").sum()
voyage_by_port = voyage_by_port.apply(lambda x : x.sort_values(ascending=False))
print(len(voyage_by_port))

72


In [5]:
import plotly.graph_objects as go
import plotly.express as px
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="MyApp")
import json


def save_coords():
    
    depart_coords = {city.lstrip():get_coordinates(city) for city in voyage_dict["MJBYPTIMP"].unique()}
    with open("depart_coords.txt", 'w') as file:
        file.write(json.dumps(depart_coords))
    print("Step 1 complete!")
    
    arrive_coords = {city.lstrip():get_coordinates(city) for city in voyage_dict["MJSELIMP"].unique()}
    with open("arrive_coords.txt", 'w') as file:
        file.write(json.dumps(arrive_coords))
    print("Step 2 complete!")
    
    
    
def read_coords_from_file():
    
    with open("depart_coords.txt", 'r') as file:
        depart_coords = json.loads(file.read())
        
    with open("arrive_coords.txt", 'r') as file:
        arrive_coords = json.loads(file.read())
        
    return depart_coords, arrive_coords
    
    

def get_coordinates(city):
    try:
        location = geolocator.geocode(city)
        loc = (location.latitude, location.longitude)
        #print(city + ":\t" + str(loc))
        return loc
    except:
        return "NOT FOUND"
    
depart_coords, arrive_coords = read_coords_from_file()

In [None]:
groups = [group for group in voyage_groups]
groups = [groups[i][0].lstrip() for i in range(len(groups))]
miscellaneous = ['Codes without labels', 'Bight of Benin', 'British Americas', 'Gold Coast', 'Netherlands', 'Other Brazil', 'Other Africa', 'Peru', 'England', 'France']
for misc in miscellaneous:
    groups.remove(misc)

time_series = df[["YEARAM", "MJBYPTIMP", "MJSELIMP", "SLAMIMP"]].dropna()

def display_graph(group_name):
   
    country_series = time_series[time_series["MJSELIMP"] == " " + group_name]
    x = country_series["YEARAM"]
    y = country_series["SLAMIMP"]

    f = px.bar(x=x, y=y,
               title = "Transportation of Enslaved Persons",
               labels={"x": "", "y": ""},
               template="ggplot2",
               opacity = 1
              )
    f.update_layout(
            xaxis = dict(
                showticklabels=True
            )
        )

    return f


def plot_voyage_group(group_name):
    fig = px.scatter_geo(voyage_dict, size=voyage_dict["SLAMIMP"])

    fig.update_layout(
        title = {
            'text': "Dispersion of Enslaved Persons"})
       
    #barbados = voyage_groups.get_group(" Barbados")
    group = voyage_groups_dict[group_name]
    for voyage in group["MJBYPTIMP"]:
       
        try:
            fig.add_trace(
                go.Scattergeo(
                    lon = [depart_coords[voyage][1], arrive_coords[" "+group_name][1]],
                    lat = [depart_coords[voyage][0], arrive_coords[" "+group_name][0]],
                    mode = 'lines',
                    line = dict(width = 1,color = 'red'),
                    opacity = 0.5,
                    hoverinfo = "name"
                )
            )
           
        except:
            continue

    fig.update_geos(
        center = dict(lat=15, lon=-32),
        lataxis_range=[-50,50], lonaxis_range=[-110, 60]
        )
    fig.update_traces(showlegend=False)
    return fig


def find_top_imports(group_name):
    imports = list(voyage_groups_dict[group_name]["MJBYPTIMP"].unique())
    imports_dict = {imp:0 for imp in imports}
   
    group = voyage_groups_dict[group_name]
    ports = group.groupby("MJBYPTIMP").sum().sort_values(by="SLAMIMP", ascending=False)
    return ports.head(10)


import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

app = dash.Dash(__name__)

app.layout = html.Div([
   
    html.H1("Trans-Atlantic Slave Trade", style = {'text-align':'center', 'font-family': 'Arial', 'font-size': '3em'}),
    html.H3("The figures below depict the number of enslaved persons transported, where they came from, and where they were taken between 1514 and 1866.",
            style = {'text-align':'center', 'font-family':'Arial', 'font-size': '1em'}),
    html.Br(),
    html.H4("Choose a region", style={'text-align': 'center', 'font-family':'Arial', 'font-size': '1.5em'}),
    dcc.Dropdown(groups, 'Bahia', id='dropdown-selection', searchable=False, clearable=False,
             style = {
                 'display': 'block',
                 'margin-left': 'auto',
                 'margin-right': 'auto',
                 'width': '50%',
                 'height': '5%',
                 'font-family': 'Arial',
                 'font-weight': 'bold',
                 'font-size': '1.5em',
                 'text-align': 'center'
             }),
    
    html.Div([
        dcc.Graph(id='graph2', style={'display': 'inline-block'}),
        dcc.Graph(id='graph3', style={'display': 'inline-block'})
    ]),
    
    dcc.Graph(id='graph1')
    
    ])

@app.callback(
    Output('graph2', 'figure'),
    Input('dropdown-selection', 'value')
)
def update_graph(value):
    fig = plot_voyage_group(value)
    return fig

@app.callback(
    Output('graph1', 'figure'),
    Input('dropdown-selection', 'value')
)
def update_graph(value):
    return display_graph(value)

@app.callback(
    Output('graph3', 'figure'),
    Input('dropdown-selection', 'value')
)
def update_graph(value):
    df = find_top_imports(value)
    f = px.bar(y=df.index, x=df["SLAMIMP"],
               template="ggplot2",
               labels={"x": "", "y": ""},
               opacity = 1,
               orientation='h'
              )
    f.update_layout(yaxis=dict(autorange="reversed"), title = {'text': "Top Imports of Enslaved Persons", 'x':0.95})
    return f
   

if __name__ == '__main__':
    app.run_server(host='0.0.0.0', port=8080, debug=True, use_reloader=False)

Dash is running on http://0.0.0.0:8080/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
