<a href="https://colab.research.google.com/github/katherinetien/revisiting-baltimore-sf-teenbirth/blob/main/baltimore_sf_teenbirth_kathytien.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Comparing Baltimore and San Francisco Teen Birth Rates 

*  how are rates different for different races of the same income level and neighborhood
*   how are rates different for different cities

# import libraries

In [1]:
pip install plotly



In [2]:
# import libraries 

#for data analysis 
import pandas as pd
import numpy as np

#for visualization 
import plotly.express as px

# import data

In [3]:
# import data 

# baltimore teen birth rates for low-income black population data
df_balt_teenbirth_b = pd.read_csv("https://raw.githubusercontent.com/katherinetien/comparing-baltimore-san-francisco-teenbirth/master/balt-teenbirth-b.csv")

In [4]:
# preview data 

df_balt_teenbirth_b.head()

Unnamed: 0,tract,Neighborhood,City,State,Teenage Birth Rate,Race
0,24510200701,Allendale,Baltimore,MD,0.4713,Black
1,24510270101,Arcadia,Baltimore,MD,0.582,Black
2,24510271801,Arlington,Baltimore,MD,0.5628,Black
3,24510260404,Baltimore Highlands,Baltimore,MD,0.539,Black
4,24510260800,Baltimore Highlands,Baltimore,MD,0.3809,Black


In [5]:
# import rest of data

# baltimore teen birth rates for low-income white population data 
df_balt_teenbirth_w = pd.read_csv("https://raw.githubusercontent.com/katherinetien/comparing-baltimore-san-francisco-teenbirth/master/balt-teenbirth-w1.csv")

In [6]:
df_balt_teenbirth_w.head()

Unnamed: 0,tract,Neighborhood,City,State,Teenage Birth Rate,Race
0,24510270101,Arcadia,Baltimore,MD,0.0494,White
1,24510260401,Armistead Gardens,Baltimore,MD,0.2887,White
2,24510260404,Baltimore Highlands,Baltimore,MD,0.4296,White
3,24510260800,Baltimore Highlands,Baltimore,MD,0.3596,White
4,24510250101,Beechfield,Baltimore,MD,0.4052,White


In [7]:
# sf teen birth rates for low-income black population data 
df_sf_teenbirth_b = pd.read_csv("https://raw.githubusercontent.com/katherinetien/comparing-baltimore-san-francisco-teenbirth/master/sf-teenbirth-b.csv")

In [8]:
# sf teen birth rates for low-income white population data
df_sf_teenbirth_w = pd.read_csv("https://raw.githubusercontent.com/katherinetien/comparing-baltimore-san-francisco-teenbirth/master/sf-teenbirth-w.csv")

In [9]:
# preview data 

df_sf_teenbirth_w.tail()

Unnamed: 0,tract,Neighborhood,City,State,Teenage Birth Rate,Race
101,6075031100,Outer Mission,San Francisco,CA,0.0,White
102,6075022704,Potrero Hill,San Francisco,CA,0.0,White
103,6075060400,Lakeshore,San Francisco,CA,0.0,White
104,6075032902,Sunset District,San Francisco,CA,0.0,White
105,6075015400,Laurel Heights,San Francisco,CA,0.0,White


# filter data for baltimore city only

In [10]:
# look at all of the column names
df_balt_teenbirth_b.columns

Index(['tract', 'Neighborhood', 'City', 'State', 'Teenage Birth Rate', 'Race'], dtype='object')

In [11]:
df_balt_teenbirth_b.describe()

Unnamed: 0,tract,Teenage Birth Rate
count,168.0,168.0
mean,24480130000.0,0.511733
std,119829900.0,0.092474
min,24003750000.0,0.1682
25%,24510110000.0,0.463975
50%,24510180000.0,0.5226
75%,24510260000.0,0.5736
max,24510280000.0,0.7126


In [12]:
df_balt_teenbirth_b.head()

Unnamed: 0,tract,Neighborhood,City,State,Teenage Birth Rate,Race
0,24510200701,Allendale,Baltimore,MD,0.4713,Black
1,24510270101,Arcadia,Baltimore,MD,0.582,Black
2,24510271801,Arlington,Baltimore,MD,0.5628,Black
3,24510260404,Baltimore Highlands,Baltimore,MD,0.539,Black
4,24510260800,Baltimore Highlands,Baltimore,MD,0.3809,Black


In [13]:
df_balt_teenbirth_b_onlybalt = df_balt_teenbirth_b[df_balt_teenbirth_b["City"] == "Baltimore"]

# merge data

In [14]:
# merge baltimore data for both races

df_balt_teenbirth_b_w_merge = pd.merge(df_balt_teenbirth_b[["tract","Neighborhood","City","State","Teenage Birth Rate"]], 
                                       df_balt_teenbirth_w[["Neighborhood","Teenage Birth Rate"]], 
                                       how = "left", 
                                       on = "Neighborhood")

In [15]:
df_balt_teenbirth_b_w_merge.head(15)

Unnamed: 0,tract,Neighborhood,City,State,Teenage Birth Rate_x,Teenage Birth Rate_y
0,24510200701,Allendale,Baltimore,MD,0.4713,
1,24510270101,Arcadia,Baltimore,MD,0.582,0.0494
2,24510271801,Arlington,Baltimore,MD,0.5628,
3,24510260404,Baltimore Highlands,Baltimore,MD,0.539,0.4296
4,24510260404,Baltimore Highlands,Baltimore,MD,0.539,0.3596
5,24510260800,Baltimore Highlands,Baltimore,MD,0.3809,0.4296
6,24510260800,Baltimore Highlands,Baltimore,MD,0.3809,0.3596
7,24510120400,Barclay,Baltimore,MD,0.6161,
8,24510250101,Beechfield,Baltimore,MD,0.5453,0.4052
9,24510080102,Belair - Edison,Baltimore,MD,0.5786,0.1923


In [16]:
# rename x for black population and y for white population
df_balt_teenbirth_b_w_merge = df_balt_teenbirth_b_w_merge.rename(columns={"Teenage Birth Rate_x": "Teenage Birth Rate_Black",
                                            "Teenage Birth Rate_y": "Teenage Birth Rate_White"})

In [17]:
df_balt_teenbirth_b_w_merge.head()

Unnamed: 0,tract,Neighborhood,City,State,Teenage Birth Rate_Black,Teenage Birth Rate_White
0,24510200701,Allendale,Baltimore,MD,0.4713,
1,24510270101,Arcadia,Baltimore,MD,0.582,0.0494
2,24510271801,Arlington,Baltimore,MD,0.5628,
3,24510260404,Baltimore Highlands,Baltimore,MD,0.539,0.4296
4,24510260404,Baltimore Highlands,Baltimore,MD,0.539,0.3596


In [18]:
# merge sf data for both races
df_sf_teenbirth_b_w_merge = pd.merge(df_sf_teenbirth_b[["tract","Neighborhood","City","State","Teenage Birth Rate"]], 
                                       df_sf_teenbirth_w[["Neighborhood","Teenage Birth Rate"]], 
                                       how = "left", 
                                       on = "Neighborhood")

In [19]:
df_sf_teenbirth_b_w_merge.head(15)

Unnamed: 0,tract,Neighborhood,City,State,Teenage Birth Rate_x,Teenage Birth Rate_y
0,6075026401,Visitacion Valley,San Francisco,CA,0.6899,
1,6075015400,Laurel Heights,San Francisco,CA,0.5801,0.0
2,6075023300,Bayview,San Francisco,CA,0.5746,
3,6075023103,Bayview,San Francisco,CA,0.5402,
4,6075023003,Bayview,San Francisco,CA,0.4741,
5,6075061400,Potrero Hill,San Francisco,CA,0.4642,0.0783
6,6075061400,Potrero Hill,San Francisco,CA,0.4642,0.033
7,6075061400,Potrero Hill,San Francisco,CA,0.4642,0.0
8,6075060502,Sunnydale,San Francisco,CA,0.4484,
9,6075023102,Bayview,San Francisco,CA,0.4481,


In [20]:
# rename x for black population and y for white population
df_sf_teenbirth_b_w_merge = df_sf_teenbirth_b_w_merge.rename(columns={"Teenage Birth Rate_x": "Teenage Birth Rate_Black",
                                            "Teenage Birth Rate_y": "Teenage Birth Rate_White"})

In [21]:
df_sf_teenbirth_b_w_merge.head(15)

Unnamed: 0,tract,Neighborhood,City,State,Teenage Birth Rate_Black,Teenage Birth Rate_White
0,6075026401,Visitacion Valley,San Francisco,CA,0.6899,
1,6075015400,Laurel Heights,San Francisco,CA,0.5801,0.0
2,6075023300,Bayview,San Francisco,CA,0.5746,
3,6075023103,Bayview,San Francisco,CA,0.5402,
4,6075023003,Bayview,San Francisco,CA,0.4741,
5,6075061400,Potrero Hill,San Francisco,CA,0.4642,0.0783
6,6075061400,Potrero Hill,San Francisco,CA,0.4642,0.033
7,6075061400,Potrero Hill,San Francisco,CA,0.4642,0.0
8,6075060502,Sunnydale,San Francisco,CA,0.4484,
9,6075023102,Bayview,San Francisco,CA,0.4481,


# bar graph of teen birth rates by neighborhood in both cities

In [23]:
# create baltimore data bar graph 

bar_balt_teenbirth = px.bar(df_balt_teenbirth_b_w_merge, x= "Neighborhood", y= "Teenage Birth Rate_Black", title= "test" )

In [24]:
bar_balt_teenbirth

# average data

In [25]:
# average data points by neighborhood 

df_sf_teenbirth_b_w_merge.describe()

Unnamed: 0,tract,Teenage Birth Rate_Black,Teenage Birth Rate_White
count,57.0,57.0,35.0
mean,6075044000.0,0.344119,0.101114
std,126940.3,0.118691,0.168966
min,6075015000.0,0.1068,0.0
25%,6075018000.0,0.2777,0.00715
50%,6075025000.0,0.3327,0.0336
75%,6075026000.0,0.4127,0.101
max,6075981000.0,0.6899,0.6662


# bar graph of average teen birth rates by race in baltimore and sf