# <center>Class 4<br>Part 2: Other Graphics Tools</center>

## Opjectives
In this class we will learn:
<ul>
    <li>how to create a choropleth map</li>
    <li>Change the color scales</li>
    
</ul>

In [2]:
import pandas as pd
import plotly.graph_objects as go
import plotly

data_2010 = pd.read_csv('states/2010.csv')
data_2021 = pd.read_csv('states/2021.csv')
state_codes = pd.read_csv('states/state_codes.csv')

print(data_2010.head())
print(data_2021.head())
print(state_codes.head())

        GEO_ID       State            Total
0  0400000US01     Alabama  4779736(r38235)
1  0400000US02      Alaska   710231(r38823)
2  0400000US04     Arizona          6392017
3  0400000US05    Arkansas  2915918(r39193)
4  0400000US06  California         37253956
        GEO_ID       State     Total
0  0400000US01     Alabama   5024279
1  0400000US02      Alaska    733391
2  0400000US04     Arizona   7151502
3  0400000US05    Arkansas   3011524
4  0400000US06  California  39538223
        State  Abbrev Code
0     Alabama    Ala.   AL
1      Alaska  Alaska   AK
2     Arizona   Ariz.   AZ
3    Arkansas    Ark.   AR
4  California  Calif.   CA


In [3]:
#the data needs some cleaning. So let's get the (r###) out
data_2010['Total'] = data_2010['Total'].str.replace('\(r[0-9]+?\)', '', regex = True).astype(int)

data_2010 = data_2010.merge(state_codes[['State', 'Code']], on = 'State')
data_2021 = data_2021.merge(state_codes[['State', 'Code']], on = 'State')
    
print(data_2010.head())
print(data_2021.head())

        GEO_ID       State     Total Code
0  0400000US01     Alabama   4779736   AL
1  0400000US02      Alaska    710231   AK
2  0400000US04     Arizona   6392017   AZ
3  0400000US05    Arkansas   2915918   AR
4  0400000US06  California  37253956   CA
        GEO_ID       State     Total Code
0  0400000US01     Alabama   5024279   AL
1  0400000US02      Alaska    733391   AK
2  0400000US04     Arizona   7151502   AZ
3  0400000US05    Arkansas   3011524   AR
4  0400000US06  California  39538223   CA


## 1. Create a basic map
Here I will create a function that returns the figure object for a choropleth map.<br>
This will be a generic function which will only take a df object for now.

In [4]:
def get_map(df, year):
    # Create figure
    fig = go.Figure()

    fig.add_trace(
        go.Choropleth(

            locations = df['Code'],                   # the states
            z = df['Total'],                              # Data to be color-coded
            locationmode = 'USA-states',                      # set of locations match entries in `locations`

            #options to play around with
            visible = True,
            zmin = 0,
            zmax = df['Total'].max(),
            hovertemplate = "%{location}<br>Total Population: %{z}",
        )
    )

    fig.update_layout(
        title = 'US Total Hispanic Population in {}'.format(year),
        geo = {'scope': 'usa'},
        height = 400
    )
    
    return(fig)
get_map(data_2010, 2010).show()
get_map(data_2021, 2021).show()

## 2. Let's add a time layer to this

In [5]:
# I find it easier to do this when the data is a panel
data_2010['Year'] = 2010
data_2021['Year'] = 2021
df_whole = data_2010.append(data_2021)

#get_map(df_whole.loc[df_whole['Year']==2010], 2010).show()

In order to create a timed-layered choropleth map we need to:
1. Add the labels for each time layer we plot; this will be rendered on top of the map. All layers must be hidden. This is optional.
2. Add the actual data to be plotted in each timed-layer. All layers must be hidden.
3. Define which initial layer to show. Note that you could have tens of months/years to plot. Here we have only 2, so this could have been done manually.
3. Add a slider to be able to switch between layers.

In [6]:
# Create figure
fig = go.Figure()

#number of months to render
num_traces = 2
years = [2010, 2021]

################################
# This takes care of step 1
# text to show inside the states
df_whole['text'] = df_whole['Total'].apply(lambda x: 'Population: ' + str(x))

# Add map labels, one for each month
for group in df_whole.groupby('Year'):
    fig.add_trace(
        go.Scattergeo(
            visible = False,
            locations = group[1]['Code'],
            locationmode = 'USA-states',
            text = group[1]['Total'],
            mode = 'text',
            textposition='top left'
        )
    )
##############################


##############################
# Now step 2
# Now add the Choropleth maps, one for each layer
for group in df_whole.groupby('Year'):
    this_date = '{}'.format(group[1]['Year'].to_list()[0])    # although grouped by Year, this is still a series
    fig.add_trace(
        go.Choropleth(
            visible = False,
            locations = group[1]['Code'],                    # the states
            z = group[1]['Total'],                              # Data to be color-coded
            zmin = df_whole['Total'].min(),
            zmax = df_whole['Total'].max(),
            locationmode = 'USA-states',                      # set of locations match entries in `locations`
            name = this_date,
            hovertemplate = "%{location}<br>Total Population: %{z}",
        )
    )
##############################
    
# Make both the last Scattergeo (text) trace and the last choropleth trace visible; lists are 0 bound
# Keep in mind that both the labels and the data layers are in the same data list.
fig.data[num_traces-1].visible = True
fig.data[num_traces*2 - 1].visible = True

##  Create the steps for the slider
steps = []
for pos in range(num_traces):
    step = {
        'method': 'restyle',
        'args': [
              {"visible": [False] * num_traces * 2},
              {"title": "Total Hispanic Population on {}".format(years[pos])}],  # layout attribute
        'label': '{}'.format(years[pos])
    }
    step["args"][0]["visible"][pos] = True  # Toggle i'th trace to "visible"
    step["args"][0]["visible"][pos+num_traces] = True  # Toggle i'th + num_traces to "visible" to show the text
    steps.append(step)

# This makes the actual slider
sliders = [{
    'active': num_traces-1,
    'currentvalue': {"prefix": "Year: "},
    'pad': {"t": 0},
    'steps': steps
}]

#Just the final touches
fig.update_layout(
    title = 'Multi-layer Choropleth of US Hispanic Population',
    geo = {
        'scope': 'usa',
        'projection': {'type': 'albers usa'}
    },
    sliders = sliders,
    height = 400
)

#Figure is ready
fig.show()

### Let's try other colors
Let's make the same graph as above but let's also include a color scale as argument.

In [7]:
def get_timed_map(color):
    # Create figure
    fig = go.Figure()

    #number of months to render
    num_traces = 2
    years = [2010, 2021]

    ################################
    # This takes care of step 1
    # text to show inside the states
    df_whole['text'] = df_whole['Total'].apply(lambda x: 'Population: ' + str(x))

    # Add map labels, one for each month
    for group in df_whole.groupby('Year'):
        fig.add_trace(
            go.Scattergeo(
                visible = False,
                locations = group[1]['Code'],
                locationmode = 'USA-states',
                text = (group[1]['Total']/100000).round(0).astype(int).astype(str) + 'M',
                mode = 'text',
                textposition='top left'
            )
        )
    ##############################


    ##############################
    # Now step 2
    # Now add the Choropleth maps, one for each layer
    for group in df_whole.groupby('Year'):
        this_date = '{}'.format(group[1]['Year'].to_list()[0])    # although grouped by Year, this is still a series
        fig.add_trace(
            go.Choropleth(
                visible = False,
                locations = group[1]['Code'],                    # the states
                z = group[1]['Total'],                              # Data to be color-coded
                zmin = df_whole['Total'].min(),
                zmax = df_whole['Total'].max(),
                locationmode = 'USA-states',                      # set of locations match entries in `locations`
                colorscale = color,
                name = this_date,
                hovertemplate = "%{location}<br>Total Population: %{z}",
            )
        )
    ##############################

    # Make both the last Scattergeo (text) trace and the last choropleth trace visible; lists are 0 bound
    # Keep in mind that both the labels and the data layers are in the same data list.
    fig.data[num_traces-1].visible = True
    fig.data[num_traces*2 - 1].visible = True

    ##  Create the steps for the slider
    steps = []
    for pos in range(num_traces):
        step = {
            'method': 'restyle',
            'args': [
                  {"visible": [False] * num_traces * 2},
                  {"title": "Total Hispanic Population on {}".format(years[pos])}],  # layout attribute
            'label': '{}'.format(years[pos])
        }
        step["args"][0]["visible"][pos] = True  # Toggle i'th trace to "visible"
        step["args"][0]["visible"][pos+num_traces] = True  # Toggle i'th + num_traces to "visible" to show the text
        steps.append(step)

    # This makes the actual slider
    sliders = [{
        'active': num_traces-1,
        'currentvalue': {"prefix": "Year: "},
        'pad': {"t": 0},
        'steps': steps
    }]

    #Just the final touches
    fig.update_layout(
        title = 'Multi-layer Choropleth of US Hispanic Population<br>{}'.format(color),
        geo = {
            'scope': 'usa',
            'projection': {'type': 'albers usa'}
        },
        sliders = sliders,
        height = 1000
    )

    #Figure is ready
    return(fig)

Plotly has a well defined library of colors that can be accessed via px.colors.sequential.<br>
Some of these colors are either to clear or to dark. I selected the ones I like the most and I also edited the boundries of some of the color-scales. Here are the results.

In [8]:
import plotly.express as px

#
inferno = px.colors.sequential.Inferno[3:]
thermal = px.colors.sequential.thermal[5:]
rdylbu = px.colors.diverging.RdYlBu[::-1][2:]
icefire = px.colors.cyclical.IceFire[3:]
colorscales = ['orrd', 'temps', 'hot_r', 'rdylbu_r', 'ylorrd', 'gnbu_r', icefire, inferno, 'balance', thermal, 'agsunset']


for color in colorscales:
    fig = get_timed_map(color)
    fig.show()