### Book Banning in United States - Kate O'Laughlin

#### Table of Contents:
* [Library Plot](#LibraryPlot)
* [Book Ban Plot](#BookBanPlot)
* [Funding Per Student Plot](#fundingPlot)
* [Education Scores Plot](#EduScoresPlot)
* [Demographic Data Plot](#demographicPlot)
* [Book Ban Plot](#BookBanPlot)

In [1]:
# use necessary libraries
#!pip install dash-daq

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, State
import dash_daq as daq
import dash_bootstrap_components as dbc
import plotly.express as px
from plotly.subplots import make_subplots
import json

In [2]:
#Import all needed datasets
bannedBooks = pd.read_excel('data/banned book data/bannedBooks.xlsx', header=[2], \
                           usecols=['Author', 'Title', 'Type of Ban', 'State', \
                                    'District', 'Date of Challenge/Removal'])
library = pd.read_csv('data/public library data/libraries.csv')
stateLibrary = pd.read_csv('data/public library data/states.csv')
stateEdu = pd.read_csv('data/stateEducation/states_all.csv')
stateCode = pd.read_csv('data/stateCode/state_abbrev.csv')
stateScores = pd.read_csv('data/stateEducation/naep_states_summary.csv')
stateDemo = pd.read_csv('data/stateEducation/NCES_Enroll_G_9_12.csv', sep=',', header=3)
statesJson = open('data/us-states.json')
states = json.load(statesJson)

  library = pd.read_csv('data/public library data/libraries.csv')


-----------------------------------------------

#### Create Library Plot <span id='LibraryPlot'>

In [3]:
libraryCustomData = library[['Library Name', 'Street Address', 'City', \
                            'Zip Code', 'Young Adult Programs']].to_numpy()

In [4]:
#overlay library locations on Chloropleth Map
def addLibraries(fig):
    fig.add_trace(go.Scattermapbox(
        lat=library.Latitude,
        lon=library.Longitude,
        mode='markers',
        marker=go.scattermapbox.Marker(
            size=12,
            color='rgb(127, 134, 230)',
            opacity=0.7
        ),
        text=library['Library Name'],
        customdata=libraryCustomData,
        hovertemplate=
        "<b>%{customdata[0]}</b><br>" +
        "Address: %{customdata[1]}, %{customdata[2]}, %{customdata[3]}<br>" +        
        "Number of Young Adult Programs: %{customdata[4]}<br>",
        marker_size=4,
        ))
    
    return fig

------------------------------------

#### Set Up Banned Book Plot <span id='BookBanPlot'>

In [5]:
# per State, list out the Author, Book, and Number of Bans on each book

stateBooks = bannedBooks.groupby('State').count()
# merge in stateCode df for easy joining between future table merges
stateBooks = stateBooks.merge(stateCode, left_on='State', right_on='State', how='left')
stateBooks = stateBooks[['State', 'Author', 'Code']]

totalBooks = stateBooks.rename(columns={'Author': '# of Book Bans'})

print(totalBooks.head(5))

      State  # of Book Bans Code
0    Alaska               1   AK
1  Arkansas               1   AR
2   Florida             566   FL
3   Georgia              23   GA
4     Idaho              26   ID


In [6]:
#merged state codes in which banned books df
bannedBooks = bannedBooks.merge(stateCode, on='State', how='left')

In [7]:
#added top 5 titles to the book ban aggrated by state df
booksPerState = []

for idx, row in totalBooks.iterrows():
    state = row['Code']
    bookNames = pd.value_counts(np.array(bannedBooks[bannedBooks['Code'] == state]\
                                         ['Title']))[0:5].index   
    booksPerState.append([state, '<br>'.join([str(x) for x in bookNames])])
    
booksPerStateDf = pd.DataFrame(booksPerState, columns=['Code', 'Titles'])

totalBooks = totalBooks.merge(booksPerStateDf, on='Code', how='left')  

print(pd.DataFrame(totalBooks.head(5)))

      State  # of Book Bans Code  \
0    Alaska               1   AK   
1  Arkansas               1   AR   
2   Florida             566   FL   
3   Georgia              23   GA   
4     Idaho              26   ID   

                                              Titles  
0                             Gender Queer: A Memoir  
1        Beyond Magenta: Transgender Teens Speak Out  
2  The Hate U Give<br>Thirteen Reasons Why<br>Nin...  
3  The Absolutely True Diary of a Part-Time India...  
4  Leah on the Offbeat (Simonverse Series)<br>The...  


-------------------------------------------

#### Set up School Funding Plot <span id='fundingPlot'>

In [8]:
# select and format funding dataframe to be used in the plots
funding = stateEdu[stateEdu['YEAR'] == 2016]

#format state names
funding['STATE'] = funding['STATE'].str.title()
funding = funding.merge(stateCode, left_on='STATE', right_on='State', how='left')

#generate proper funding values
funding['Funding Per Student'] = funding['TOTAL_REVENUE']/funding['ENROLL']
funding['Funding Per Student Dollar'] = (funding['Funding Per Student']).apply(lambda x: '${:,.2f}'.format(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  funding['STATE'] = funding['STATE'].str.title()


In [9]:
#create funding plots
def revenueMap():
    fundingDf = funding[['STATE', 'YEAR', 'ENROLL', 
                           'TOTAL_REVENUE', 'Code',
                           'Funding Per Student', 'Funding Per Student Dollar']]
    fundingDf = fundingDf.sort_values(by='Funding Per Student', ascending = False)
    fundingDf['STATE'] = fundingDf['STATE'].str.replace('_', ' ')
    
    # merge in total books to show values in tooltip
    mergedDf = fundingDf.merge(totalBooks, on='Code', how='left')
    
    fig = px.scatter(mergedDf, color='STATE', x='# of Book Bans', y='Funding Per Student', 
                 hover_data=['STATE', 'Funding Per Student Dollar'])
    fig.update_layout(xaxis_tickangle=-45)

    return fig

def singleRevenueMap(states):
    fundingDf = funding[['STATE', 'YEAR', 'ENROLL', 
                           'TOTAL_REVENUE', 'Code',
                           'Funding Per Student', 'Funding Per Student Dollar']] 
    if states != None:
        statefunding = fundingDf[fundingDf['Code'].isin(states)]
    else:
        statefunding = fundingDf
    
    statefunding = statefunding.merge(totalBooks, on='Code', how='left')
    statefunding['# of Book Bans'] = statefunding['# of Book Bans'].fillna(0)
    
    statefunding = statefunding.sort_values(by='Funding Per Student', ascending = False)
    statefunding['STATE'] = statefunding['STATE'].str.replace('_', ' ')    
    enrollfig = px.scatter(statefunding,
                       x='# of Book Bans', y='Funding Per Student', color='STATE', title='Funding Per Student')
    enrollfig.update_layout(yaxis_title='Funding Per Student', yaxis_range=[0, 30])
    
    
    return enrollfig

---------------------------------------

#### Set up Education Scores Plot <span id='EduScoresPlot'>

In [10]:
# get education scores and do cleanup
stateScores = stateScores[stateScores['YEAR'] == 2019][['STATE', 'YEAR', 'AVG_MATH_8_SCORE', 'AVG_READING_8_SCORE']]
stateScores['STATE'] = stateScores['STATE'].str.replace('_', ' ')
stateScores['STATE'] = stateScores['STATE'].str.title()
stateScores = stateScores.merge(stateCode, left_on='STATE', right_on='State', how='left')

In [11]:
# get national scores 
# generate state specific math/reading columns that is the score - national average
# due to low differentiation in scores, this gives users an easier vis to compare states
national = stateScores[stateScores['STATE'] == 'National']
natReading = national['AVG_READING_8_SCORE'].iloc[0]
natMath = national['AVG_MATH_8_SCORE'].iloc[0]

stateScores['Math'] = stateScores['AVG_MATH_8_SCORE'] - natMath
stateScores['Reading'] = stateScores['AVG_READING_8_SCORE'] - natReading

stateScores = stateScores.sort_values(by=['Math', 'Reading'], ascending=False)

In [12]:
#create academic score plots
def createEduScoreFig():
    fig = px.histogram(stateScores, x="Code", y=["Math", "Reading"],
                 barmode='group', 
                 title='Math and Reading Scores Compared to National Average')
    fig.update_layout(
        xaxis_title_text='State', # xaxis label
        yaxis_title_text='Score', # yaxis label
        xaxis_tickangle=-45,
        legend_title = 'Score Subject',
        bargap=0.2, # gap between bars of adjacent location coordinates
        bargroupgap=0.1 # gap between bars of the same location coordinates
    )
    
    return fig


def createSingleEduScoreFig(states):
     
    if states != None:
        scoresToShow = stateScores[stateScores['Code'].isin(states)]
    else:
        scoresToShow = stateScores    
    
    fig = px.histogram(scoresToShow, x='Code', y=["Math", "Reading"], 
                 barmode='group', 
                 title='Math and Reading Scores Compared to National Average')
    fig.update_layout(
        xaxis_title_text='State', # xaxis label
        yaxis_title_text='Score', # yaxis label        
        xaxis_tickangle=-45,
        yaxis_range=[-15,15],
        legend_title = 'Score Subject',
        bargap=0.2, # gap between bars of adjacent location coordinates
        bargroupgap=0.1 # gap between bars of the same location coordinates
    )
    
    
    return fig

--------------------------------

#### Set up Demographic Data Plot <span id='demographicPlot'>

In [13]:
#extract only columns that are in the most recent year in the dataset
#merge in state codes
columnsToKeepMetric = stateDemo.columns[stateDemo.columns.str.endswith('2016-17')].tolist()
columnsToKeep = ["State Name"]
columnsToKeep.extend(columnsToKeepMetric)
stateDemo = stateDemo[columnsToKeep]
stateDemo = stateDemo.rename(columns={'State Name':'State'})

In [14]:
#clean state demographic data
stateDemo = stateDemo.replace('†', 0)
stateDemo = stateDemo.replace('–', 0)
stateDemo = stateDemo.replace('="0"', 0)
stateDemo = stateDemo.fillna(0)

stateDemo[columnsToKeepMetric] = stateDemo[columnsToKeepMetric].astype(float)

In [15]:
listOfStates = stateDemo['State'][:51].str.lower().unique().tolist()

In [16]:
#aggreate columns on demographics
#demographics were seperated by class year and we want an aggregate
whiteColumns = [col for col in stateDemo.columns if 'White' in col]
indianColumns = [col for col in stateDemo.columns if 'American Indian/Alaska Native' in col]
asianColumns = [col for col in stateDemo.columns if 'Asian' in col]
hispanicColumns = [col for col in stateDemo.columns if 'Hispanic' in col]
blackColumns = [col for col in stateDemo.columns if 'Black' in col]
hawaiianColumns = [col for col in stateDemo.columns if 'Hawaiian' in col]
twoRaceColumns = [col for col in stateDemo.columns if 'Two or More' in col]
colGroups = {'White': whiteColumns, 
             'AmericanIndian': indianColumns,
             'Asian': asianColumns,
             'Hispanic': hispanicColumns, 
             'Black': blackColumns,
             'Hawaiian': hawaiianColumns,
             'Two Or More': twoRaceColumns}

In [17]:
demographicData = pd.DataFrame(listOfStates, columns = ['State'])

In [18]:
#aggregate demographic data
for key in colGroups:
    group = colGroups[key]    
    group.append('State')
    groupInfo = stateDemo[group].groupby(stateDemo['State'].str.lower()).sum().sum(axis=1)
    groupDf = groupInfo.to_frame().reset_index()
    groupDf = groupDf.rename(columns={0: key})
    demographicData = demographicData.merge(groupDf, on='State', how='left')

In [19]:
demographicData['Total'] = demographicData.sum(axis=1)
demographicData['State'] = demographicData['State'].str.title()

  demographicData['Total'] = demographicData.sum(axis=1)


In [20]:
columns = ['White', 'AmericanIndian', 'Asian', 
    'Hispanic', 'Black','Hawaiian', 'Two Or More']

for col in columns:
    demographicData[col + ' %'] = (demographicData[col]/demographicData['Total']) *100

In [21]:
demographicData = demographicData.sort_values(by='State')

In [22]:
#plot demographic data
def createDemographicData():
    fig = go.Figure()
    fig.add_trace(go.Bar(
        y=demographicData["White %"],
        x=demographicData.State,
        name="White",
        marker=dict(
            color='RGB (0, 149, 168)',
            line=dict(color='RGB (0, 149, 168)', width=0.05)
        )
    ))
    fig.add_trace(go.Bar(
        y=demographicData["AmericanIndian %"],
        x=demographicData.State,
        name="American Indian/Alaska Native",
        marker=dict(
            color='RGB (17, 46, 81)',
            line=dict(color='RGB (17, 46, 81)', width=0.05)
        )
    ))
    fig.add_trace(go.Bar(
        y=demographicData["Asian %"],
        x=demographicData.State,
        name="Asian",
        marker=dict(
            color='RGB (255, 112, 67)',
            line=dict(color='RGB (255, 112, 67)', width=0.05)
        )
    ))

    fig.add_trace(go.Bar(
        y=demographicData["Hispanic %"],
        x=demographicData.State,
        name="Hispanic",
        marker=dict(
            color='RGB (120, 144, 156)',
            line=dict(color='RGB (120, 144, 156)', width=0.05)
        )
    ))

    fig.add_trace(go.Bar(
        y=demographicData["Black %"],
        x=demographicData.State,
        name="Black",
        marker=dict(
            color='RGB (46, 120, 210)',
            line=dict(color='RGB (46, 120, 210)', width=0.05)
        )
    ))

    fig.add_trace(go.Bar(
        y=demographicData["Hawaiian %"],
        x=demographicData.State,
        name="Hawaiian",
        marker=dict(
            color='RGB (0, 108, 122)',
            line=dict(color='RGB (0, 108, 122)', width=0.05)
        )
    ))

    fig.add_trace(go.Bar(
        y=demographicData["Two Or More %"],
        x=demographicData.State,
        name="Two Or More Races",
        marker=dict(
            color='RGB (255, 151, 118)',
            line=dict(color='RGB (255, 151, 118)', width=0.05)
        )
    ))
    fig.update_layout(
            yaxis=dict(
            title_text="Demographic %",
            #ticktext=["0%", "20%", "40%", "60%","80%","100%"]
            #tickvals=[0, 20, 40, 60, 80, 100],
            tickmode="array",        
            titlefont=dict(size=15),
        ),
        autosize=False,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        title={
            'text': "State Demographics",        
            'yanchor': 'top'},
        barmode='stack')

    fig.update_xaxes(tickangle=-45)

    return fig

def createSingleDemographicData(states):    
    
    fig = make_subplots(rows=3, cols=3,
                subplot_titles=("White","American Indian/Alaskan Native",
                                "Asian", "Hispanic", "Black", "Hawaiian", 
                                "Two Or More Races"),
                       vertical_spacing = 0.3)
    
    demoStateData = demographicData.merge(stateCode, on='State', how='left')

    if states != None:
        demoStateData = demoStateData[demoStateData['Code'].isin(states)]   
   
    fig.append_trace(go.Bar(
        y=demoStateData["White %"],
        x=demoStateData.Code,
        name="White",
        marker=dict(
            color='RGB (0, 149, 168)',
            line=dict(color='RGB (0, 149, 168)', width=0.05)
        )
    ), row=1, col=1)
    fig.append_trace(go.Bar(
        y=demoStateData["AmericanIndian %"],
        x=demoStateData.Code,
        name="American Indian/Alaska Native",
        marker=dict(
            color='RGB (17, 46, 81)',
            line=dict(color='RGB (17, 46, 81)', width=0.05)
        )
    ), row=1, col=2)
    fig.append_trace(go.Bar(
        y=demoStateData["Asian %"],
        x=demoStateData.Code,
        name="Asian",
        marker=dict(
            color='RGB (255, 112, 67)',
            line=dict(color='RGB (255, 112, 67)', width=0.05)
        )
    ), row=1, col=3)

    fig.append_trace(go.Bar(
        y=demoStateData["Hispanic %"],
        x=demoStateData.Code,
        name="Hispanic",
        marker=dict(
            color='RGB (120, 144, 156)',
            line=dict(color='RGB (120, 144, 156)', width=0.05)
        )
    ), row=2, col=1)

    fig.append_trace(go.Bar(
        y=demoStateData["Black %"],
        x=demoStateData.Code,
        name="Black",
        marker=dict(
            color='RGB (46, 120, 210)',
            line=dict(color='RGB (46, 120, 210)', width=0.05)
        )
    ), row=2, col=2)

    fig.append_trace(go.Bar(
        y=demoStateData["Hawaiian %"],
        x=demoStateData.Code,
        name="Hawaiian",
        marker=dict(
            color='RGB (0, 108, 122)',
            line=dict(color='RGB (0, 108, 122)', width=0.05)
        )
    ), row=2, col=3)

    fig.append_trace(go.Bar(
        y=demoStateData["Two Or More %"],
        x=demoStateData.Code,
        name="Two Or More Races",
        marker=dict(
            color='RGB (255, 151, 118)',
            line=dict(color='RGB (255, 151, 118)', width=0.05)
        )
    ), row=3, col=1)

    fig.update_layout(
            legend=dict(
                orientation="h",
                yanchor="top",
                y=1.15,
                xanchor="right",
                x=1
            ),
            yaxis=dict(range=[0, 100]), 
            yaxis2=dict(range=[0, 100]), 
            yaxis3=dict(range=[0, 100]), 
            yaxis4=dict(range=[0, 100]), 
            yaxis5=dict(range=[0, 100]), 
            yaxis6=dict(range=[0, 100]), 
            yaxis7=dict(range=[0, 100]),             
           
        autosize=False,
        width=1200,
        height=600,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        title={
            'text': "State Demographics",        
            'yanchor': 'top'},
        barmode='stack')     

    fig.update_xaxes(tickangle=45)    
    
    return fig

In [23]:
#build map of book bans
def chloroplethMap(switch):
    df = totalBooks
    locationCol = 'Code'
    colorCol = '# of Book Bans'
    label = '# of Book Bans'
    
    fig = px.choropleth_mapbox(df, geojson=states, 
                           locations=locationCol, 
                           color=colorCol,
                           color_continuous_scale="brwnyl",
                           range_color=(0, df[colorCol].max()),
                           mapbox_style="carto-positron",
                           labels = df[label],
                           zoom=2.75, 
                           center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           hover_data = ['Code', '# of Book Bans', 'Titles'] 
                          )
    
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    
    if switch: 
        addLibraries(fig)
    
    return fig

In [24]:
stateOptions = []
for idx, state in stateCode.iterrows():
    stateOptions.append({'label':state['State'], 'value':state['Code']})

In [None]:
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

tab1_content = html.Div(
        [
            dbc.Row([
                dbc.Col(html.H3("Banned Books Across America"), 
                        className='ps-4',
                        xl=6, align="center"),
                dbc.Col(
                    html.Div([
                        html.Label('Select Data'),
                        dcc.Dropdown(
                        options = [
                            {'label': 'Total Ban Counts', 'value': 0},                            
                            {'label': 'Funding per Student', 'value': 1},
                            {'label': 'Education Scores', 'value': 2},
                            {'label': 'Demographic Data', 'value': 3},
                        ],value=0, id='map-dropdown'),
                    ]), xl=3),
                dbc.Col(dbc.Checklist(
                    id='library-switch',
                     options=[
                        {"label": "Show Libraries", "value": True},
                    ],
                    value=False,                    
                    switch=True
                ), xl=3, align="center")
            ], className= 'pb-3'),
            dcc.Graph(id="mainMap")
        ], className="mt-3")

tab2_content = html.Div([
        dbc.Row([            
            dbc.Col(
                html.Div([
                    html.Label('Select States'),
                    dcc.Dropdown(
                    options = stateOptions, id='state-dropdown',
                    multi=True, searchable=True),
                ]),
                className='ms-3', xl=5, lg=6),
            dbc.Col(html.Div([
                html.H5('Book Bans:'),
                html.Div(id='bookBanInfo', children=[]),
            ]), xl=6, lg=6, className='alert alert-info me-3'),
        ], className= 'pb-3 pt-3'),        
        dbc.Row([
            dbc.Col(dcc.Graph(id='compare-rev'), lg=6),
            dbc.Col(dcc.Graph(id='compare-edu'), lg=6),
        ]),       
        dcc.Graph(id='compare-demo'),
])

app.layout = dbc.Tabs(
    [
        dbc.Tab(tab1_content, label="All States"),
        dbc.Tab(tab2_content, label="Compare States"),       
    ]
)


@app.callback(
    Output('mainMap', 'figure'),
    Input('map-dropdown', 'value'),
    Input('library-switch', 'value')
)
def update_figure(dropdown, switch):
    if dropdown == 0: #total ban
        return chloroplethMap(switch)    
    elif dropdown == 1: #funding
        return revenueMap()
    elif dropdown == 2: #education scores
        return createEduScoreFig()
    elif dropdown == 3: #demographics
        return createDemographicData()
    else: 
        return changeMap(totalBooks, 'Code', 'Number of <br>Book Bans', switch)

@app.callback(
    Output('compare-rev', 'figure'),
    Output('compare-edu', 'figure'),
    Output('compare-demo', 'figure'),
    Output('bookBanInfo', 'children'),
    Input('state-dropdown', 'value')
)
def update_compare(states):   
    
    subplot1 = singleRevenueMap(states)        
    subplot2 = createSingleEduScoreFig(states)        
    subplot3 = createSingleDemographicData(states) 
    
    bookBans = []
    if states != None:        
        for state in states:  
            stateFull = stateCode[stateCode['Code'] == state]['State'].iloc[0]
            info = totalBooks[totalBooks['Code'] == state]   
            if len(info) == 1: 
                bookNumber = info['# of Book Bans'].iloc[0]
            else:
                bookNumber = '0'

            bookBans.append(stateFull + ' : ' + str(bookNumber))
        
        bookBans = ',   '.join(bookBans)
        
    else:
        bookBans = [html.Span('No States Chosen')]
    
    return subplot1, subplot2, subplot3, bookBans

if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False, port=8050)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
