### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [18]:
# Dependencies and Setup
import pandas as pd

In [98]:
## Import Rent Data
# Original data source: https://www.kaggle.com/datasets/austinreese/usa-housing-listings
# Last updated Jan 2020
# Filtered down for 12 largest metros according to: https://worldpopulationreview.com/us-cities

# Read file and display
rent = "Resources/housing_majors.csv"
rent = pd.read_csv(rent)

# Standardize city names
cities_list = rent.drop_duplicates(subset=['region'])
rent = rent.replace([city for city in cities_list['region']], 
                    ['SF/SJ', 'SD', 'JAX', 'CHI', 'NY', 'PHL', 'ATX', 'DFW', 'HOU', 'SA', 'PHX', 'LA'])

# Display data 
rent

Unnamed: 0,id,url,region,region_url,price,type,sqfeet,beds,baths,cats_allowed,...,wheelchair_access,electric_vehicle_charge,comes_furnished,laundry_options,parking_options,image_url,description,lat,long,state
0,7048692414,https://sfbay.craigslist.org/scz/apa/d/capitol...,SF/SJ,https://sfbay.craigslist.org,1650,apartment,325,1,1.0,0,...,0,0,0,no laundry on site,off-street parking,https://images.craigslist.org/00Y0Y_1S9nzXZ8NF...,Lovely Studio With Small patio for your seren...,36.9771,-121.953,ca
1,7049506190,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,1450,apartment,700,1,1.0,0,...,0,0,0,laundry on site,off-street parking,https://images.craigslist.org/00G0G_cG4yFLfaAa...,Apartment Details: -1 bedroom 1 bath - Proper...,32.7423,-117.095,ca
2,7043935679,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,1700,apartment,700,1,1.0,0,...,0,0,0,laundry on site,,https://images.craigslist.org/00g0g_fjiyS1iI2o...,1 Bedroom with a Great View!!! Minutes to free...,32.7277,-117.165,ca
3,7035359947,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,2685,townhouse,1127,2,2.0,1,...,0,0,0,w/d in unit,attached garage,https://images.craigslist.org/00g0g_dA6FtxqdV9...,The life youâve always imagined is at Levant...,32.7958,-117.071,ca
4,7049978012,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,1390,apartment,263,0,1.0,1,...,0,0,0,laundry on site,street parking,https://images.craigslist.org/00v0v_akWNyI1F4d...,THE BARCELONA The perfect pairing of location...,32.7294,-117.162,ca
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26088,7035391481,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,2525,apartment,1127,2,2.0,1,...,0,0,0,w/d in unit,attached garage,https://images.craigslist.org/00E0E_bs0T3dPEIH...,"Located near Mission Valley, the townhomes at ...",32.7958,-117.071,ca
26089,7049783450,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,2593,apartment,1094,2,2.0,1,...,0,0,0,w/d in unit,,https://images.craigslist.org/01414_kiWespdEZL...,To schedule a tour We now book our tour appoin...,32.8274,-117.138,ca
26090,7049426671,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,1300,apartment,490,1,1.0,1,...,1,0,1,laundry on site,off-street parking,https://images.craigslist.org/00B0B_bKWtLiBFGY...,Beautiful Turn-Key 1 Bed/1 Bath is located in ...,32.7560,-117.121,ca
26091,7049461332,https://sandiego.craigslist.org/nsd/apa/d/esco...,SD,https://sandiego.craigslist.org,1646,apartment,812,2,1.0,1,...,0,0,0,,,https://images.craigslist.org/00i0i_lgzbYDbzfP...,Bedrooms: 2 Bathrooms: 1 Square Feet: 812 Int...,33.1435,-117.097,ca


In [99]:
## Import income data
# Source: https://www.bea.gov/data/income-saving/personal-income-county-metro-and-other-areas ('Metropolitian Area Table')
income = "Resources/income_data.xlsx"
income = pd.read_excel(income)

# Select relevant rows and columns
income = income.iloc[7:391, [0,2]]

# Rename columns
income.rename(columns = {'Table 2. Per Capita Personal Income, by Metropolitan Area, 2019–2021': "region", 
                         'Unnamed: 2': "Per_Capita_Income_2020"}, inplace = True)

# Filter for major cities
city_list = ['San Francisco-Oakland-Berkeley, CA', 'San Jose-Sunnyvale-Santa Clara, CA', 'San Diego-Chula Vista-Carlsbad, CA', 
             'Jacksonville, FL', 'Chicago-Naperville-Elgin, IL-IN-WI', 'New York-Newark-Jersey City, NY-NJ-PA', 
             'Philadelphia-Camden-Wilmington, PA-NJ-DE-MD', 'Austin-Round Rock-Georgetown, TX', 'Dallas-Fort Worth-Arlington, TX', 
             'Houston-The Woodlands-Sugar Land, TX', 'San Antonio-New Braunfels, TX', 'Phoenix-Mesa-Chandler, AZ', 
             'Los Angeles-Long Beach-Anaheim, CA']
income_filter = income.loc[income['region'].isin(city_list)]

# Standardize city names
income = income_filter.replace([city for city in income_filter['region']], 
                               ['ATX', 'CHI', 'DFW', 'HOU', 'JAX', 'LA', 'NY', 'PHL', 'PHX', 'SA', 'SD', 'SF/SJ', 'SF/SJ'])

# Group by cities in order to combine 'San Francisco & San Jose'
income = income.groupby(['region']).mean()

# Display Data
income

Unnamed: 0_level_0,Per_Capita_Income_2020
region,Unnamed: 1_level_1
ATX,64916.0
CHI,66474.0
DFW,61824.0
HOU,61133.0
JAX,55158.0
LA,70280.0
NY,78727.0
PHL,68200.0
PHX,54907.0
SA,50214.0


In [100]:
## Merge Rent and Income data
data = pd.merge(rent, income, how="left", on = "region")
data

Unnamed: 0,id,url,region,region_url,price,type,sqfeet,beds,baths,cats_allowed,...,electric_vehicle_charge,comes_furnished,laundry_options,parking_options,image_url,description,lat,long,state,Per_Capita_Income_2020
0,7048692414,https://sfbay.craigslist.org/scz/apa/d/capitol...,SF/SJ,https://sfbay.craigslist.org,1650,apartment,325,1,1.0,0,...,0,0,no laundry on site,off-street parking,https://images.craigslist.org/00Y0Y_1S9nzXZ8NF...,Lovely Studio With Small patio for your seren...,36.9771,-121.953,ca,115573.5
1,7049506190,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,1450,apartment,700,1,1.0,0,...,0,0,laundry on site,off-street parking,https://images.craigslist.org/00G0G_cG4yFLfaAa...,Apartment Details: -1 bedroom 1 bath - Proper...,32.7423,-117.095,ca,67830.0
2,7043935679,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,1700,apartment,700,1,1.0,0,...,0,0,laundry on site,,https://images.craigslist.org/00g0g_fjiyS1iI2o...,1 Bedroom with a Great View!!! Minutes to free...,32.7277,-117.165,ca,67830.0
3,7035359947,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,2685,townhouse,1127,2,2.0,1,...,0,0,w/d in unit,attached garage,https://images.craigslist.org/00g0g_dA6FtxqdV9...,The life youâve always imagined is at Levant...,32.7958,-117.071,ca,67830.0
4,7049978012,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,1390,apartment,263,0,1.0,1,...,0,0,laundry on site,street parking,https://images.craigslist.org/00v0v_akWNyI1F4d...,THE BARCELONA The perfect pairing of location...,32.7294,-117.162,ca,67830.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26088,7035391481,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,2525,apartment,1127,2,2.0,1,...,0,0,w/d in unit,attached garage,https://images.craigslist.org/00E0E_bs0T3dPEIH...,"Located near Mission Valley, the townhomes at ...",32.7958,-117.071,ca,67830.0
26089,7049783450,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,2593,apartment,1094,2,2.0,1,...,0,0,w/d in unit,,https://images.craigslist.org/01414_kiWespdEZL...,To schedule a tour We now book our tour appoin...,32.8274,-117.138,ca,67830.0
26090,7049426671,https://sandiego.craigslist.org/csd/apa/d/san-...,SD,https://sandiego.craigslist.org,1300,apartment,490,1,1.0,1,...,0,1,laundry on site,off-street parking,https://images.craigslist.org/00B0B_bKWtLiBFGY...,Beautiful Turn-Key 1 Bed/1 Bath is located in ...,32.7560,-117.121,ca,67830.0
26091,7049461332,https://sandiego.craigslist.org/nsd/apa/d/esco...,SD,https://sandiego.craigslist.org,1646,apartment,812,2,1.0,1,...,0,0,,,https://images.craigslist.org/00i0i_lgzbYDbzfP...,Bedrooms: 2 Bathrooms: 1 Square Feet: 812 Int...,33.1435,-117.097,ca,67830.0


In [118]:
## Census File to Load
# Original data source: https://worldpopulationreview.com/us-cities
# Updated for 2020 Census, includes 2023 population

# Read file and display
census = "Resources/census_data.csv"
census = pd.read_csv(census)

# Filter for major cities
city_list = ['San Francisco', 'San Jose', 'San Diego', 'Jacksonville', 'Chicago', 'New York City', 'Philadelphia', 'Austin', 
             'Dallas', 'Fort Worth', 'Houston', 'San Antonio', 'Phoenix', 'Los Angeles']
census_filter = census.loc[census['name'].isin(city_list)]

# Standardize city names
census = census_filter.replace([city for city in census_filter['name']], 
                               ['NY', 'LA', 'CHI', 'HOU', 'PHX', 'PHL', 'SA', 'SD', 'DFW', 'SF/SJ', 'ATX', 'JAX', 'DFW', 'SF/SJ'])

# Group by cities in order to combine 'Dallas & Forth Worth' and 'San Francisco & San Jose'
census = census.groupby(['name']).sum()
census

Unnamed: 0_level_0,pop2023,pop2022,pop2020,pop2010,growth,geoid,ansicode,funcstat,aland,awater,aland_sqmi,awater_sqmi,intptlat,intptlong,metroId,rank,density
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ATX,1013293,996147,961855,790390,0.01721,4805000,2409761,0,828638000,17024700,319.939,6.573,30.3012,-97.7527,12420.0,11,3167
CHI,2761625,2756546,2746388,2695598,0.00184,1714000,428803,0,588882000,17621500,227.369,6.804,41.8376,-87.6818,16980.0,3,12146
DFW,2308575,2280148,2223294,1939022,0.02666,9646000,4820819,0,1774970000,137481300,685.32,53.082,65.5748,-194.1132,38200.0,22,6746
HOU,2366119,2345606,2304580,2099451,0.00875,4835000,2410796,0,1658090000,81248100,640.194,31.37,29.7857,-95.3888,26420.0,4,3696
JAX,987960,975177,949611,821784,0.01311,1235000,2404783,0,1935930000,329458000,747.467,127.204,30.3369,-81.6616,27260.0,12,1322
LA,3930586,3919973,3898747,3792621,0.00271,644000,2410877,0,1214590000,87468500,468.956,33.772,34.0194,-118.411,31080.0,2,8382
NY,8992908,8930002,8804190,8175133,0.00704,3651000,2395220,0,777983000,434642000,300.381,167.816,40.6635,-73.9387,35620.0,1,29938
PHL,1627134,1619355,1603797,1526006,0.0048,4260000,1215531,0,347782000,21806100,134.279,8.419,40.0094,-75.1333,37980.0,6,12118
PHX,1656892,1640641,1608139,1445632,0.00991,455000,2411414,0,1340770000,3236740,517.673,1.25,33.5722,-112.09,38060.0,5,3201
SA,1466791,1456069,1434625,1327407,0.00736,4865000,2411774,0,1256440000,14515600,485.113,5.605,29.4659,-98.5247,41700.0,7,3024


In [119]:
## Merge Datasets on city
data = pd.merge(data, census, how="left", left_on = "region", right_on = "name")

# Delete unused columns
data = data.drop(columns = ['url', 'region_url', 'image_url', 'pop2023', 'pop2022', 'growth', 'geoid', 'ansicode', 'funcstat', 'intptlat', 
                            'intptlong', 'metroId', 'rank', 'density'])

# Display data
data

Unnamed: 0,id,region,price,type,sqfeet,beds,baths,cats_allowed,dogs_allowed,smoking_allowed,...,lat,long,state,Per_Capita_Income_2020,pop2020,pop2010,aland,awater,aland_sqmi,awater_sqmi
0,7048692414,SF/SJ,1650,apartment,325,1,1.0,0,0,0,...,36.9771,-121.953,ca,115573.5,1887205,1751177,581999000,487244850,224.711,188.126
1,7049506190,SD,1450,apartment,700,1,1.0,0,0,0,...,32.7423,-117.095,ca,67830.0,1386932,1307402,844018000,120537000,325.877,46.540
2,7043935679,SD,1700,apartment,700,1,1.0,0,0,1,...,32.7277,-117.165,ca,67830.0,1386932,1307402,844018000,120537000,325.877,46.540
3,7035359947,SD,2685,townhouse,1127,2,2.0,1,1,1,...,32.7958,-117.071,ca,67830.0,1386932,1307402,844018000,120537000,325.877,46.540
4,7049978012,SD,1390,apartment,263,0,1.0,1,1,0,...,32.7294,-117.162,ca,67830.0,1386932,1307402,844018000,120537000,325.877,46.540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26088,7035391481,SD,2525,apartment,1127,2,2.0,1,1,1,...,32.7958,-117.071,ca,67830.0,1386932,1307402,844018000,120537000,325.877,46.540
26089,7049783450,SD,2593,apartment,1094,2,2.0,1,1,1,...,32.8274,-117.138,ca,67830.0,1386932,1307402,844018000,120537000,325.877,46.540
26090,7049426671,SD,1300,apartment,490,1,1.0,1,1,1,...,32.7560,-117.121,ca,67830.0,1386932,1307402,844018000,120537000,325.877,46.540
26091,7049461332,SD,1646,apartment,812,2,1.0,1,1,1,...,33.1435,-117.097,ca,67830.0,1386932,1307402,844018000,120537000,325.877,46.540


In [None]:
## Add calculated columns
data['Density'] = data['pop2020'] / data['aland_sqmi']
data['Growth'] = (data['pop2020'] / data['pop2010']) - 1

In [120]:
## Write to csv
data.to_csv('Resources/final_data.csv')