#### Import Libraries

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set()

#### Import Data

In [None]:
usa = pd.read_csv('../../data/census_2015/acs2015_census_tract_data.csv')

In [None]:
ca = usa[usa['State'] == 'California']

In [None]:
for col in ca.columns:
    print(col)

In [None]:
ca['County'].value_counts().sort_index()

### Plot Swarm of a single column

In [None]:
_ = sns.swarmplot(x = "County", y = "Income", data=ca)
plt.show()

#### Limit the scope of the `ca` dataframe to only contain a few select counties.

In [None]:
counties_to_examine = ["San Joaquin", "Los Angeles", "San Diego", "Contra Costa"]
county_df = ca[ca["County"].isin(counties_to_examine)]

print(county_df["County"].unique())
print("\nRows x Columns:", counties.shape)

#### Plot Swarm of a single column - again.

In [None]:
_ = sns.swarmplot(x = "County", y = "Income", data=county_df)
plt.show()

In [None]:
_ = sns.violinplot(x = "County", y = "Income", data=county_df)
plt.show()

### Plot ECDF of a single column.

In [None]:
def prep_ecdf(data):
    
    n = len(data)
    x = np.sort(data)
    y = np.arange(1, n + 1) / n

    return x, y

#### Prepare and Plot Income data for San Diego

In [None]:
# Prepare Income Data
is_sandiego = county_df["County"] == "San Diego"
county = county_df[is_sandiego]
county_income = county["Income"]

x,y = prep_ecdf(county_income)

In [None]:
#Inspect
# print(x)
# print("-"*100)
# print(y)

In [None]:
# Plot
_ = plt.plot(x, y, marker = ".", linestyle = "none")
_ = plt.title("ECDF of San Diego Incomes")
_ = plt.xlabel("County Income")
_ = plt.ylabel("ECDF")

#### Prepare and Plot Income data for All counties in `county_df`

In [None]:
labels = []
for county in county_df["County"].unique():
    
    county_income = county_df[county_df["County"] == county]["Income"]
    x,y = prep_ecdf(county_income)
    _ = plt.plot(x, y, marker = ".", linestyle = "none")
    
    labels.append(county)

_ = plt.legend(labels, loc='lower right')
_ = plt.xlabel("Incomes")
_ = plt.ylabel('ECDF')
plt.show()

In [None]:
#What can we say about the incomes of these 4 counties? Take a minute to ponder this.