# Link Instacart Service Area to Counties

Accessing the data in our MongoDB collections for Instacart and Counties, we will use Pandas to merge and create a new collection to house the combined data.

### Import dependencies

In [1]:
import os
import pandas as pd
from pandas import DataFrame
import pymongo

### Setup DB connection and establish collection for storage

In [2]:
# Setup connection to MongoDB
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Connect to DB and Collections plus create collection for storing combined data
db = client.food_desert_db
collection_a = db.zipcode_county_data
collection_b = db.instacart_cities
collection_c = db.instacart_counties

### Import collections from MongoDB and convert to DataFrames

In [4]:
collection_a_data = db.zipcode_county_data.find()
collection_b_data = db.instacart_cities.find()

In [5]:
county = pd.DataFrame.from_records(collection_a_data)
#county.to_csv(os.path.join("Data", "county.csv"))
county.head()

Unnamed: 0,_id,city-state,city-state/county,county
0,5e1a977963f1268ef8667c9d,"Holtsville, NY","Holtsville, NY/Suffolk County",Suffolk County
1,5e1a977963f1268ef8667c9e,"Agawam, MA","Agawam, MA/Hampden County",Hampden County
2,5e1a977963f1268ef8667c9f,"Amherst, MA","Amherst, MA/Hampshire County",Hampshire County
3,5e1a977963f1268ef8667ca0,"Amherst, MA","Amherst, MA/Hampshire County",Hampshire County
4,5e1a977963f1268ef8667ca1,"Amherst, MA","Amherst, MA/Hampshire County",Hampshire County


In [6]:
svc_area = pd.DataFrame.from_records(collection_b_data)
#svc_area.to_csv(os.path.join("Data", "svc_area.csv"))
svc_area.head()

Unnamed: 0,City/State,_id
0,"Adamsville, AL",5e1a883d49ffc32c34c05001
1,"Alabaster, AL",5e1a883d49ffc32c34c05002
2,"Albertville, AL",5e1a883d49ffc32c34c05003
3,"Anniston, AL",5e1a883d49ffc32c34c05004
4,"Arab, AL",5e1a883d49ffc32c34c05005


In [7]:
svc_area.count()

City/State    10732
_id           10732
dtype: int64

In [8]:
county.rename(columns = {'city-state': 'City/State'}, inplace = True)
county.head()

Unnamed: 0,_id,City/State,city-state/county,county
0,5e1a977963f1268ef8667c9d,"Holtsville, NY","Holtsville, NY/Suffolk County",Suffolk County
1,5e1a977963f1268ef8667c9e,"Agawam, MA","Agawam, MA/Hampden County",Hampden County
2,5e1a977963f1268ef8667c9f,"Amherst, MA","Amherst, MA/Hampshire County",Hampshire County
3,5e1a977963f1268ef8667ca0,"Amherst, MA","Amherst, MA/Hampshire County",Hampshire County
4,5e1a977963f1268ef8667ca1,"Amherst, MA","Amherst, MA/Hampshire County",Hampshire County


In [9]:
instacart_counties = pd.merge(svc_area, county, on='City/State', how='left')
instacart_counties.head()

Unnamed: 0,City/State,_id_x,_id_y,city-state/county,county
0,"Adamsville, AL",5e1a883d49ffc32c34c05001,5e1a977a63f1268ef866aacf,"Adamsville, AL/Jefferson County",Jefferson County
1,"Alabaster, AL",5e1a883d49ffc32c34c05002,5e1a977a63f1268ef866aad1,"Alabaster, AL/Shelby County",Shelby County
2,"Alabaster, AL",5e1a883d49ffc32c34c05002,5e1a977a63f1268ef866aad2,"Alabaster, AL/Shelby County",Shelby County
3,"Albertville, AL",5e1a883d49ffc32c34c05003,5e1a977a63f1268ef866abeb,"Albertville, AL/Marshall County",Marshall County
4,"Anniston, AL",5e1a883d49ffc32c34c05004,5e1a977a63f1268ef866ac4c,"Anniston, AL/Calhoun County",Calhoun County


In [10]:
instacart_counties.count()

City/State           14334
_id_x                14334
_id_y                14253
city-state/county    14253
county               14253
dtype: int64

In [11]:
instacart_counties = instacart_counties[['City/State', 'county']]
instacart_counties.head()

Unnamed: 0,City/State,county
0,"Adamsville, AL",Jefferson County
1,"Alabaster, AL",Shelby County
2,"Alabaster, AL",Shelby County
3,"Albertville, AL",Marshall County
4,"Anniston, AL",Calhoun County


In [12]:
instacart_counties_nodup = DataFrame.drop_duplicates(instacart_counties).copy()
instacart_counties_nodup

Unnamed: 0,City/State,county
0,"Adamsville, AL",Jefferson County
1,"Alabaster, AL",Shelby County
3,"Albertville, AL",Marshall County
4,"Anniston, AL",Calhoun County
10,"Arab, AL",Marshall County
11,"Ashford, AL",Houston County
12,"Athens, AL",Limestone County
13,"Attalla, AL",Etowah County
14,"Auburn University, AL",Lee County
15,"Auburn, AL",Lee County


In [13]:
instacart_counties_nodup.count()

City/State    10988
county        10907
dtype: int64

In [14]:
instacart_counties_nodup.loc[instacart_counties_nodup.isnull().any(axis=1)]

Unnamed: 0,City/State,county
116,"McCalla, AL",
172,"Smiths, AL",
203,"Elmendorf Afb, AK",
227,"Fort Richardson, AK",
246,"Anthem, AZ",
475,"Henning, AR",
555,"Ripley, AR",
614,"Alta Loma, CA",
1167,"March Air Force Base, CA",
1175,"Mcclellan Afb, CA",


Removed null county values from dataset - mostly this looks like some of the city/state combos are impacted by how they were spelled/spaced such as Mc or McMechen between the datasets.  With more time, would go through and address, but due to impending due date - dropped for purposes of this project.

In [15]:
instacart_counties_nodup.dropna(subset=['county'], inplace=True)
instacart_counties_nodup.count()

City/State    10907
county        10907
dtype: int64

In [16]:
instacart_counties_nodup.head()

Unnamed: 0,City/State,county
0,"Adamsville, AL",Jefferson County
1,"Alabaster, AL",Shelby County
3,"Albertville, AL",Marshall County
4,"Anniston, AL",Calhoun County
10,"Arab, AL",Marshall County


In [17]:
collection_c.insert_many(instacart_counties_nodup.to_dict('records'))

<pymongo.results.InsertManyResult at 0x192376b4408>

In [18]:
listings = db.instacart_counties.find()

for listing in listings:
    print(listing)

{'_id': ObjectId('5e1a98a7b3d5f760d86d79e1'), 'City/State': 'Adamsville, AL', 'county': 'Jefferson County'}
{'_id': ObjectId('5e1a98a7b3d5f760d86d79e2'), 'City/State': 'Alabaster, AL', 'county': 'Shelby County'}
{'_id': ObjectId('5e1a98a7b3d5f760d86d79e3'), 'City/State': 'Albertville, AL', 'county': 'Marshall County'}
{'_id': ObjectId('5e1a98a7b3d5f760d86d79e4'), 'City/State': 'Anniston, AL', 'county': 'Calhoun County'}
{'_id': ObjectId('5e1a98a7b3d5f760d86d79e5'), 'City/State': 'Arab, AL', 'county': 'Marshall County'}
{'_id': ObjectId('5e1a98a7b3d5f760d86d79e6'), 'City/State': 'Ashford, AL', 'county': 'Houston County'}
{'_id': ObjectId('5e1a98a7b3d5f760d86d79e7'), 'City/State': 'Athens, AL', 'county': 'Limestone County'}
{'_id': ObjectId('5e1a98a7b3d5f760d86d79e8'), 'City/State': 'Attalla, AL', 'county': 'Etowah County'}
{'_id': ObjectId('5e1a98a7b3d5f760d86d79e9'), 'City/State': 'Auburn University, AL', 'county': 'Lee County'}
{'_id': ObjectId('5e1a98a7b3d5f760d86d79ea'), 'City/State