# US Populations
References:
- https://www.indexmundi.com/facts/united-states/quick-facts/all-states/population#map

Data Souces:
- https://www.census.gov/data/datasets/time-series/demo/popest/2010s-state-total.html

This notebook demonstrates the use of following Python techniques:
- Web scrapting using requests and beautiful soup
- Data processing using Pandas (data cleansing and merging)
- Interactive data visualization with Plotly 
- Choropleth Mapping with Plotly

Note:

A choropleth map is a type of thematic map in which areas are shaded or patterned in proportion to a statistical variable that represents an aggregate summary of a geographic characteristic within each area, such as population density or per-capita income. 

From Greek, khōra place + plēthos multitude.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.io as pio

In [None]:
pio.renderers

In [None]:
pio.renderer = "jupyterlab"

In [None]:
DATA_URL = "https://simple.wikipedia.org/wiki/List_of_U.S._states_by_population"

In [None]:
html = requests.get(DATA_URL).text
soup = BeautifulSoup(html, "lxml")
print(soup.prettify()[10000:20000])

In [None]:
my_table = soup.find("table",{"class":"wikitable sortable"})
print(my_table.prettify()[:3000])

In [None]:
my_trs = my_table.findAll("tr")
my_trs[:5]

In [None]:
state_pop_list = []

for tr in my_trs[1:]:                          # skip the header row
    my_tds = tr.findAll("td")
    state = my_tds[2].text.strip()             # strip away the non-sense characters
    pop = my_tds[3].text.strip()
    state_pop = [state, pop]
    state_pop_list.append(state_pop)

state_pop_list[:4]
    

In [None]:
df = pd.DataFrame(state_pop_list, columns=["State", "Population"])
df.head()

In [None]:
df.info()

In [None]:
df

In [None]:
df2 = df[:-5]
df2

In [None]:
df2["Population"] = df2["Population"].apply(lambda pop: int(pop.replace(",","")))
df2

In [None]:
df2.info()

In [None]:
df3 = df2.sort_values(by=["Population"],ascending=False)
df3

In [None]:
fig = px.bar(df3, y="State", x="Population", orientation='h', height=800)

fig.update_layout(
    title='US Population by States',
    yaxis=dict(
        tickangle=0,
        showticklabels=True,
        type='category',
       # title='Xaxis Name',
        tickmode='linear'
    )
)
fig

In [None]:
pio.renderer

In [None]:

state_fips = pd.read_csv("states.txt", sep="\t", header=None)
state_fips

In [None]:
state_fips.columns = ["State", "FIPS", "ST"]
state_fips.head()

In [None]:
df4 = pd.merge(df3, state_fips, on="State", how="inner")
df4

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json') as response:
    states = json.load(response)

states["features"][0]

In [None]:
fig = px.choropleth(df4,  
                    locations='ST', 
                    color='Population',
                    color_continuous_scale="Viridis",
                    scope="usa",
                    hover_name="State",
                    locationmode = 'USA-states',
                    labels={'ST':'State'}
)

#fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()