# Final Project: Counties Display

- **Vintage**:  2020
- **Geography Level**: County    
- **Variables**:  https://api.census.gov/data/2020/acs/acs5/profile/variables.html 
- **Supported Geographies**: https://api.census.gov/data/2020/acs/acs5/profile/geography.html

### ***Question***:  What is the estimation and percent of population who speak Spanish at home for each county in California?  

In [6]:
import pandas as pd
import plotly.express as px

## 1. Read csv file

In [7]:
df = pd.read_csv('Data/Counties_Data.csv', dtype={'FIPS_State' : str, 
                                                  'FIPS_County' : str})
print(df.shape)
df.head()

(58, 8)


Unnamed: 0,County_Name,FIPS_State,FIPS_County,Rural_Status,Language spoken at home (Spanish) (DP02_0116E),Language spoken at home (Spanish) - Percent (DP02_0116PE),State_Name,Abbreviation
0,Alameda County,6,1,Urban,250597,16.0,California,CA
1,Alpine County,6,3,Rural,110,10.1,California,CA
2,Butte County,6,7,Urban,21187,10.0,California,CA
3,Colusa County,6,11,Rural,9998,50.2,California,CA
4,Contra Costa County,6,13,Urban,195737,18.1,California,CA


In [8]:
print("Data types: ")
df.dtypes

Data types: 


County_Name                                                   object
FIPS_State                                                    object
FIPS_County                                                   object
Rural_Status                                                  object
Language spoken at home (Spanish) (DP02_0116E)                 int64
Language spoken at home (Spanish) - Percent (DP02_0116PE)    float64
State_Name                                                    object
Abbreviation                                                  object
dtype: object

## 2. Plot Bar Chart Top 5 for:

### 2.1. Language spoken at home (Spanish) (DP02_0116E)

#### - Sort values

In [9]:
df_estimate = df.sort_values(by="Language spoken at home (Spanish) (DP02_0116E)", ascending=False)
df_estimate.head()

Unnamed: 0,County_Name,FIPS_State,FIPS_County,Rural_Status,Language spoken at home (Spanish) (DP02_0116E),Language spoken at home (Spanish) - Percent (DP02_0116PE),State_Name,Abbreviation
51,Los Angeles County,6,37,Urban,3657846,38.7,California,CA
24,Riverside County,6,65,Urban,779766,34.2,California,CA
28,San Diego County,6,73,Urban,753804,24.2,California,CA
21,Orange County,6,59,Urban,732305,24.5,California,CA
27,San Bernardino County,6,71,Urban,689338,34.3,California,CA


#### - Get Top 10

In [10]:
df_estimate = df_estimate.iloc[ : 5]
df_estimate

Unnamed: 0,County_Name,FIPS_State,FIPS_County,Rural_Status,Language spoken at home (Spanish) (DP02_0116E),Language spoken at home (Spanish) - Percent (DP02_0116PE),State_Name,Abbreviation
51,Los Angeles County,6,37,Urban,3657846,38.7,California,CA
24,Riverside County,6,65,Urban,779766,34.2,California,CA
28,San Diego County,6,73,Urban,753804,24.2,California,CA
21,Orange County,6,59,Urban,732305,24.5,California,CA
27,San Bernardino County,6,71,Urban,689338,34.3,California,CA


#### - Plot bar chart

In [11]:
df_estimate.sort_values(by="Language spoken at home (Spanish) (DP02_0116E)", ascending=True, inplace=True)

In [20]:
fig = px.bar(df_estimate,              
             x='County_Name', 
             y='Language spoken at home (Spanish) (DP02_0116E)',
             text='Language spoken at home (Spanish) (DP02_0116E)',
             orientation='v',   
             template='gridon',
             title='Top 5 Counties with Spanish as first language at home (in California) (DP02_0116E)')

# Formatting bar labels
fig.update_traces(textposition='auto', 
                  texttemplate='%{text:,.2s}'
                 )

fig.show()

### 2.2. Language spoken at home (Spanish) - Percent (DP02_0116PE)

#### - Sort values

In [13]:
df_percent = df.sort_values(by="Language spoken at home (Spanish) - Percent (DP02_0116PE)", ascending=False)
df_percent.head()

Unnamed: 0,County_Name,FIPS_State,FIPS_County,Rural_Status,Language spoken at home (Spanish) (DP02_0116E),Language spoken at home (Spanish) - Percent (DP02_0116PE),State_Name,Abbreviation
8,Imperial County,6,25,Urban,122197,73.6,California,CA
3,Colusa County,6,11,Rural,9998,50.2,California,CA
18,Monterey County,6,53,Urban,194372,48.3,California,CA
40,Tulare County,6,107,Urban,201625,47.2,California,CA
16,Merced County,6,47,Urban,110652,43.8,California,CA


#### - Get Top 10

In [14]:
df_percent = df_percent.iloc[ : 5]
df_percent

Unnamed: 0,County_Name,FIPS_State,FIPS_County,Rural_Status,Language spoken at home (Spanish) (DP02_0116E),Language spoken at home (Spanish) - Percent (DP02_0116PE),State_Name,Abbreviation
8,Imperial County,6,25,Urban,122197,73.6,California,CA
3,Colusa County,6,11,Rural,9998,50.2,California,CA
18,Monterey County,6,53,Urban,194372,48.3,California,CA
40,Tulare County,6,107,Urban,201625,47.2,California,CA
16,Merced County,6,47,Urban,110652,43.8,California,CA


#### - Get value in percentage format

In [15]:
df_percent['Language spoken at home (Spanish) - Percent (DP02_0116PE)'] = df_percent['Language spoken at home (Spanish) - Percent (DP02_0116PE)'] / 100

#### - Plot bar chart

In [16]:
df_percent.sort_values(by="Language spoken at home (Spanish) - Percent (DP02_0116PE)", ascending=True, inplace=True)

In [21]:
fig = px.bar(df_percent,              
             x='County_Name', 
             y='Language spoken at home (Spanish) - Percent (DP02_0116PE)',
             text='Language spoken at home (Spanish) - Percent (DP02_0116PE)',
             orientation='v',   
             template='gridon',
             title='Top 5 Counties with Spanish as first language at home (in California) - Percent (DP02_0116PE)')

# Formatting bar labels
fig.update_traces(textposition='auto', 
                  texttemplate='%{text:.1%}'
                 )

fig.show()

## 3. Plot US State Map for:

### 3.1. Language spoken at home (Spanish) (DP02_0116E)

In [18]:
fig = px.choropleth(df, 
                    scope="usa",    
                    locationmode='USA-states',          # Plot states of USA
                    locations='State_Abbreviation',     # Column containing State Abbeviations                   
                    
                    color='Language spoken at home (Spanish) (DP02_0116E)',             # Column determining map color for each State
                    hover_name='State_Name',            # Sets top label of Tooltip
                    color_continuous_scale='OrRd', 
                    title="What is the estimation of population for each US state who speak Spanish at home? "
                   )

fig.update_layout(margin={"r":0,"t":50,"l":0,"b":0})

fig.show()

ValueError: Value of 'locations' is not the name of a column in 'data_frame'. Expected one of ['County_Name', 'FIPS_State', 'FIPS_County', 'Rural_Status', 'Language spoken at home (Spanish) (DP02_0116E)', 'Language spoken at home (Spanish) - Percent (DP02_0116PE)', 'State_Name', 'Abbreviation'] but received: State_Abbreviation

### 3.2. Language spoken at home (Spanish) - Percent (DP02_0116PE)

In [None]:
fig = px.choropleth(df, 
                    scope="usa",    
                    locationmode='USA-states',          # Plot states of USA
                    locations='State_Abbreviation',     # Column containing State Abbeviations                   
                    
                    color='Language spoken at home (Spanish) - Percent (DP02_0116PE)',      # Column determining map color for each State
                    hover_name='State_Name',            # Sets top label of Tooltip
                    color_continuous_scale='OrRd', 
                    title="What is the percentage of population for each US state who speak Spanish at home? "
                   )

fig.update_layout(margin={"r":0,"t":50,"l":0,"b":0})

fig.show()