In [1]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np

In [281]:
df  = pd.read_csv('../data_files/Employment_by_County.csv', index_col=0)
df = df.sort_values(by="unemployed_pct", ascending=False)
df.head(5)

Unnamed: 0_level_0,county,labor_force,employed,unemployed,unemployed_pct,population,race_white,race_black,race_native,race_asian,race_islander,race_other,race_two_or_more
geoid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
6025,"Imperial County, California",69602,53949,15653,22.5,179702,47537,4362,4266,3049,165,70528,49795
2230,"Skagway Municipality, Alaska",606,476,130,21.5,1240,1070,10,32,39,0,9,80
2158,"Kusilvak Census Area, Alaska",2299,1854,445,19.4,8368,173,16,7946,23,0,18,192
28063,"Jefferson County, Mississippi",1989,1623,366,18.4,7260,894,6191,12,15,5,9,134
15009,"Maui County, Hawaii",82820,68056,14764,17.8,164754,54203,1065,778,44328,20008,4204,40168


In [295]:
y_data = df['race_white'][0:30]
y_unemployed_pct = df['unemployed_pct'][0:30]
x = df['county'][0:30]

In [301]:
# Creating two subplots
fig = make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=True,
                    shared_yaxes=False, vertical_spacing=0.001)

fig.append_trace(go.Bar(
    x=y_data,
    y=x,
    marker=dict(
        color='rgba(50, 171, 96, 0.6)',
        line=dict(
            color='rgba(50, 171, 96, 1.0)',
            width=1),
    ),
    name='Population of Race White by County',
    orientation='h',
), 1, 1)

fig.append_trace(go.Scatter(
    x=y_unemployed_pct, y=x,
    mode='lines+markers',
    line_color='rgb(128, 0, 0)',
    name='Overall Unemployment Percentage in that County',
), 1, 2)

fig.update_layout(
    title='Race Population in relation to location Unemployment Percent',
    yaxis=dict(
        showgrid=False,
        showline=False,
        showticklabels=True,
        domain=[0, .9],
    ),
    yaxis2=dict(
        showgrid=False,
        showline=True,
        showticklabels=False,
        linecolor='rgba(102, 102, 102, 0.8)',
        linewidth=2,
        domain=[0, .9],
    ),
    xaxis=dict(
        zeroline=False,
        showline=False,
        showticklabels=True,
        showgrid=True,
        domain=[0, 1],
    ),
    xaxis2=dict(
        zeroline=False,
        showline=False,
        showticklabels=True,
        showgrid=True,
        domain=[0, 1],
        side='top',
        dtick=10000,
    ),
    legend=dict(x=0.029, y=1.038, font_size=10),
    margin=dict(l=100, r=20, t=70, b=70),
    paper_bgcolor='rgba(255,255,255, 0)',
    plot_bgcolor='rgba(255,255,255, 0)',
    height=800
)

annotations = []

y_s = np.round(y_data, decimals=2)
y_nw = np.rint(y_unemployed_pct)

# Adding labels
for ydn, yd, xd in zip(y_nw, y_s, x):
    # labeling the scatter pct
    annotations.append(dict(xref='x2', yref='y2',
                            y=xd, x=20,
                            text='{:,}'.format(ydn) + '%',
                            font=dict(family='Arial', size=12,
                                      color='rgba(102, 102, 102, 0.8)'),
                            showarrow=False))
    # labeling the bar net worth
    annotations.append(dict(xref='x1', yref='y1',
                            y=xd, x=yd,
                            text=str(yd),
                            font=dict(family='Arial', size=12,
                                      color='rgb(50, 171, 96)'),
                            showarrow=False))
# Source
annotations.append(dict(xref='paper', yref='paper',
                        x=-0.2, y=-0.109,
                        font=dict(family='Arial', size=10, color='rgb(150,150,150)'),
                        showarrow=False))

fig.update_layout(annotations=annotations)

fig.show()

In [297]:
y_data_black = df['race_black'][0:30]

In [299]:
# Creating two subplots
fig_black = make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=True,
                    shared_yaxes=False, vertical_spacing=0.001)

fig_black.append_trace(go.Bar(
    x=y_data_black,
    y=x,
    marker=dict(
        color='rgba(50, 171, 96, 0.6)',
        line=dict(
            color='rgba(50, 171, 96, 1.0)',
            width=1),
    ),
    name='Population of Race Black by County',
    orientation='h',
), 1, 1)

fig_black.append_trace(go.Scatter(
    x=y_unemployed_pct, y=x,
    mode='lines+markers',
    line_color='rgb(128, 0, 0)',
    name='Overall Unemployment Percentage in that County',
), 1, 2)

fig_black.update_layout(
    title='Race Population in relation to location Unemployment Percent',
    yaxis=dict(
        showgrid=False,
        showline=False,
        showticklabels=True,
        domain=[0, .9],
    ),
    yaxis2=dict(
        showgrid=False,
        showline=True,
        showticklabels=False,
        linecolor='rgba(102, 102, 102, 0.8)',
        linewidth=2,
        domain=[0, .9],
    ),
    xaxis=dict(
        zeroline=False,
        showline=False,
        showticklabels=True,
        showgrid=True,
        domain=[0, 1],
    ),
    xaxis2=dict(
        zeroline=False,
        showline=False,
        showticklabels=True,
        showgrid=True,
        domain=[0, 1],
        side='top',
        dtick=10000,
    ),
    legend=dict(x=0.029, y=1.038, font_size=10),
    margin=dict(l=100, r=20, t=70, b=70),
    paper_bgcolor='rgba(255,255,255, 0)',
    plot_bgcolor='rgba(255,255,255, 0)',
    height=800
)

annotations = []

y_s = np.round(y_data_black, decimals=2)
y_nw = np.rint(y_unemployed_pct)

# Adding labels
for ydn, yd, xd in zip(y_nw, y_s, x):
    # labeling the scatter pct
    annotations.append(dict(xref='x2', yref='y2',
                            y=xd, x=20,
                            text='{:,}'.format(ydn) + '%',
                            font=dict(family='Arial', size=12,
                                      color='rgba(102, 102, 102, 0.8)'),
                            showarrow=False))
    # labeling the bar net worth
    annotations.append(dict(xref='x1', yref='y1',
                            y=xd, x=yd,
                            text=str(yd),
                            font=dict(family='Arial', size=12,
                                      color='rgb(50, 171, 96)'),
                            showarrow=False))
# Source
annotations.append(dict(xref='paper', yref='paper',
                        x=-0.2, y=-0.109,
                        font=dict(family='Arial', size=10, color='rgb(150,150,150)'),
                        showarrow=False))

fig_black.update_layout(annotations=annotations)

fig_black.show()

In [289]:
import plotly.express as px


In [278]:
top_labels = ['White', 'Black', 'Asian', 'Other']


colors = ['aquamarine', 'blue', 'green', 'red', 'orange','aquamarine', 'blue', 'green', 'red', 'orange','aquamarine', 'blue', 'green', 'red', 'orange','aquamarine', 'blue', 'green', 'red', 'orange','aquamarine', 'blue', 'green', 'red', 'orange','aquamarine', 'blue', 'green', 'red', 'orange']

y_data = ['The course was effectively<br>organized',
          'The course developed my<br>abilities and skills ' +
          'for<br>the subject', 'The course developed ' +
          'my<br>ability to think critically about<br>the subject',
          'I would recommend this<br>course to a friend']

#x_data = df[['race_white', 'race_black', 'race_asian', 'race_other']][0:30]
x_datax = df.copy()[['race_white', 'race_black', 'race_asian', 'race_other']][0:30]
y_data = df['county'][0:30].tolist()

#x_data = np.array([x_datax['race_white'], x_datax['race_black'], x_datax['race_asian'], x_datax['race_other']])
#x_data
x_data = x_datax.values.tolist()

sum_of_rows = x_datax.sum(axis=1)
normalized_array = x_data / sum_of_rows[:, np.newaxis]
x_data = normalized_array


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



In [279]:
fig = go.Figure()

In [280]:
for i in range(0, len(x_data[0])):
    for xd, yd in zip(x_data, y_data):
        fig.add_trace(go.Bar(
            x=[xd[i]], y=[yd],
            orientation='h',
            marker=dict(
                color=colors[i],
                line=dict(color='rgb(248, 248, 249)', width=1)
            )
        ))

fig.update_layout(
    xaxis=dict(
        showgrid=False,
        showline=False,
        showticklabels=False,
        zeroline=False,
        domain=[0.15, 1]
    ),
    yaxis=dict(
        showgrid=False,
        showline=False,
        showticklabels=False,
        zeroline=False,
    ),
    barmode='stack',
    paper_bgcolor='rgb(255, 255, 255)',
    plot_bgcolor='rgb(255, 255, 255)',
    margin=dict(l=120, r=10, t=140, b=80),
    showlegend=False,
    height=800
)

annotations = []

for yd, xd in zip(y_data, x_data):
    # labeling the y-axis
    annotations.append(dict(xref='paper', yref='y',
                            x=0.14, y=yd,
                            xanchor='right',
                            text=str(yd),
                            font=dict(family='Arial', size=14,
                                      color='rgb(67, 67, 67)'),
                            showarrow=False, align='right'))
    # labeling the first percentage of each bar (x_axis)
    annotations.append(dict(xref='x', yref='y',
                            x=xd[0] / 2, y=yd,
                            text=str(xd[0]) + '%',
                            font=dict(family='Arial', size=14,
                                      color='rgb(20,20,20)'),
                            showarrow=False))
    # labeling the first Likert scale (on the top)
    if yd == y_data[-1]:
        annotations.append(dict(xref='x', yref='paper',
                                x=xd[0] / 2, y=1.1,
                                text=top_labels[0],
                                font=dict(family='Arial', size=14,
                                          color='rgb(67, 67, 67)'),
                                showarrow=False))
    space = xd[0]
    for i in range(1, len(xd)):
            # labeling the rest of percentages for each bar (x_axis)
            annotations.append(dict(xref='x', yref='y',
                                    x=space + (xd[i]/2), y=yd,
                                    text=str(xd[i]) ,
                                    font=dict(family='Arial', size=14,
                                              color='rgb(248, 248, 255)'),
                                    showarrow=False))
            # labeling the Likert scale
            if yd == y_data[-1]:
                annotations.append(dict(xref='x', yref='paper',
                                        x=space + (xd[i]/2), y=1.1,
                                        text=top_labels[i],
                                        font=dict(family='Arial', size=14,
                                                  color='rgb(67, 67, 67)'),
                                        showarrow=False))
            space += xd[i]

fig.update_layout(annotations=annotations)

fig.show()

In [215]:
print(x_data)

[[3259427, 794364, 1499984, 2784180], [2645512, 260469, 203696, 598605], [2345983, 1205824, 413271, 705153], [1720356, 907063, 349268, 895151], [1633129, 155813, 410752, 520994], [1383257, 53842, 706813, 548539], [1273054, 151468, 452475, 117830], [1112445, 84670, 210462, 85566], [1044549, 366727, 129437, 250491], [1028774, 772050, 373680, 302186], [1022846, 114294, 65779, 166174], [1016368, 286684, 237663, 348344], [995627, 156477, 171243, 637243], [946321, 163678, 58541, 14066], [924283, 574183, 183124, 477660], [919729, 161307, 68675, 308330], [893318, 170753, 105974, 22479], [882484, 674782, 64947, 52972], [855682, 262422, 44140, 111406], [855122, 171511, 97864, 54040], [847970, 23474, 51030, 110890], [847604, 229361, 221995, 208086], [817196, 153274, 164201, 137138], [802685, 299771, 74071, 49083], [796893, 400002, 44124, 319419], [782691, 184558, 182287, 621140], [776174, 531910, 75922, 167236], [767348, 237434, 71080, 129490], [736116, 370895, 44071, 35716], [715722, 152795, 281

In [219]:
sum_of_rows = x_datax.sum(axis=1)
normalized_array = x_data / sum_of_rows[:, np.newaxis]

print(normalized_array)

[[0.39091444 0.09527084 0.17989831 0.33391641]
 [0.71340637 0.0702398  0.05493002 0.16142381]
 [0.50232697 0.25819365 0.08849048 0.15098889]
 [0.44432541 0.23427194 0.09020729 0.23119537]
 [0.60026324 0.05726971 0.15097358 0.19149348]
 [0.51375383 0.01999739 0.26251657 0.20373221]
 [0.63817765 0.07593039 0.22682418 0.05906778]
 [0.74503581 0.05670589 0.14095234 0.05730596]
 [0.58315468 0.20473771 0.07226257 0.13984504]
 [0.41538263 0.31172654 0.15087879 0.12201204]
 [0.74709753 0.08348155 0.04804568 0.12137525]
 [0.53802872 0.15176022 0.12581026 0.1844008 ]
 [0.50782009 0.07981118 0.08734259 0.32502614]
 [0.80019973 0.13840451 0.04950169 0.01189407]
 [0.42805743 0.2659178  0.08480908 0.2212157 ]
 [0.63079776 0.11063269 0.04710087 0.21146868]
 [0.74909855 0.14318622 0.0888653  0.01884994]
 [0.52679794 0.40281044 0.03877005 0.03162158]
 [0.67183449 0.20603934 0.0346563  0.08746987]
 [0.72557926 0.14552874 0.08303855 0.04585346]
 [0.82059178 0.0227161  0.04938241 0.10730972]
 [0.56242742 


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



In [212]:
df2 = df.copy()[['race_white', 'race_black', 'race_asian', 'race_other']][0:30]
df_norm = (df2-df2.min())/(df2.max()-df2.min())
df_norm

Unnamed: 0_level_0,race_white,race_black,race_asian,race_other
geoid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6037,1.0,0.651998,1.0,1.0
4013,0.758653,0.200444,0.109639,0.211016
17031,0.6409,1.0,0.253587,0.24948
48201,0.394949,0.747316,0.209626,0.318068
6073,0.360658,0.111929,0.251856,0.182999
6059,0.262426,0.025684,0.455207,0.192943
53033,0.219102,0.108254,0.280514,0.037458
25017,0.155963,0.051758,0.114286,0.025811
48439,0.129271,0.290314,0.058634,0.085348
36047,0.123069,0.633126,0.226393,0.10401
