# Link Instacart Service Area to Counties

Accessing the data in our MongoDB collections for Instacart and Counties, we will use Pandas to merge and create a new collection to house the combined data.

### Import dependencies

In [5]:
import os
import pandas as pd
from pandas import DataFrame
import pymongo

### Setup DB connection and establish collection for storage

In [6]:
# Setup connection to MongoDB
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [7]:
# Connect to DB and Collections plus create collection for storing combined data
db = client.food_desert_db
collection_a = db.zipcode_county_data
collection_b = db.instacart_cities
collection_c = db.instacart_counties

### Import collections from MongoDB and convert to DataFrames

In [8]:
collection_a_data = db.zipcode_county_data.find()
collection_b_data = db.instacart_cities.find()

In [9]:
county = pd.DataFrame.from_records(collection_a_data)
#county.to_csv(os.path.join("Data", "county.csv"))
county.head()

Unnamed: 0,_id,city-state,city-state/county,county
0,5e1aa8bd051df6979a54fbd4,"Holtsville, NY","Holtsville, NY/Suffolk",Suffolk
1,5e1aa8bd051df6979a54fbd5,"Agawam, MA","Agawam, MA/Hampden",Hampden
2,5e1aa8bd051df6979a54fbd6,"Amherst, MA","Amherst, MA/Hampshire",Hampshire
3,5e1aa8bd051df6979a54fbd7,"Amherst, MA","Amherst, MA/Hampshire",Hampshire
4,5e1aa8bd051df6979a54fbd8,"Amherst, MA","Amherst, MA/Hampshire",Hampshire


In [10]:
svc_area = pd.DataFrame.from_records(collection_b_data)
#svc_area.to_csv(os.path.join("Data", "svc_area.csv"))
svc_area.head()

Unnamed: 0,City/State,_id
0,"Adamsville, AL",5e1a883d49ffc32c34c05001
1,"Alabaster, AL",5e1a883d49ffc32c34c05002
2,"Albertville, AL",5e1a883d49ffc32c34c05003
3,"Anniston, AL",5e1a883d49ffc32c34c05004
4,"Arab, AL",5e1a883d49ffc32c34c05005


In [11]:
svc_area.count()

City/State    10732
_id           10732
dtype: int64

In [12]:
county.rename(columns = {'city-state': 'City/State'}, inplace = True)
county.head()

Unnamed: 0,_id,City/State,city-state/county,county
0,5e1aa8bd051df6979a54fbd4,"Holtsville, NY","Holtsville, NY/Suffolk",Suffolk
1,5e1aa8bd051df6979a54fbd5,"Agawam, MA","Agawam, MA/Hampden",Hampden
2,5e1aa8bd051df6979a54fbd6,"Amherst, MA","Amherst, MA/Hampshire",Hampshire
3,5e1aa8bd051df6979a54fbd7,"Amherst, MA","Amherst, MA/Hampshire",Hampshire
4,5e1aa8bd051df6979a54fbd8,"Amherst, MA","Amherst, MA/Hampshire",Hampshire


In [13]:
instacart_counties = pd.merge(svc_area, county, on='City/State', how='left')
instacart_counties.head()

Unnamed: 0,City/State,_id_x,_id_y,city-state/county,county
0,"Adamsville, AL",5e1a883d49ffc32c34c05001,5e1aa8bd051df6979a552a06,"Adamsville, AL/Jefferson",Jefferson
1,"Alabaster, AL",5e1a883d49ffc32c34c05002,5e1aa8bd051df6979a552a08,"Alabaster, AL/Shelby",Shelby
2,"Alabaster, AL",5e1a883d49ffc32c34c05002,5e1aa8bd051df6979a552a09,"Alabaster, AL/Shelby",Shelby
3,"Albertville, AL",5e1a883d49ffc32c34c05003,5e1aa8bd051df6979a552b22,"Albertville, AL/Marshall",Marshall
4,"Anniston, AL",5e1a883d49ffc32c34c05004,5e1aa8bd051df6979a552b83,"Anniston, AL/Calhoun",Calhoun


In [14]:
instacart_counties.count()

City/State           14334
_id_x                14334
_id_y                14253
city-state/county    14253
county               14253
dtype: int64

In [15]:
instacart_counties = instacart_counties[['City/State', 'county']]
instacart_counties.head()

Unnamed: 0,City/State,county
0,"Adamsville, AL",Jefferson
1,"Alabaster, AL",Shelby
2,"Alabaster, AL",Shelby
3,"Albertville, AL",Marshall
4,"Anniston, AL",Calhoun


In [16]:
instacart_counties_nodup = DataFrame.drop_duplicates(instacart_counties).copy()
instacart_counties_nodup

Unnamed: 0,City/State,county
0,"Adamsville, AL",Jefferson
1,"Alabaster, AL",Shelby
3,"Albertville, AL",Marshall
4,"Anniston, AL",Calhoun
10,"Arab, AL",Marshall
11,"Ashford, AL",Houston
12,"Athens, AL",Limestone
13,"Attalla, AL",Etowah
14,"Auburn University, AL",Lee
15,"Auburn, AL",Lee


In [17]:
instacart_counties_nodup.count()

City/State    10988
county        10907
dtype: int64

In [18]:
instacart_counties_nodup.loc[instacart_counties_nodup.isnull().any(axis=1)]

Unnamed: 0,City/State,county
116,"McCalla, AL",
172,"Smiths, AL",
203,"Elmendorf Afb, AK",
227,"Fort Richardson, AK",
246,"Anthem, AZ",
475,"Henning, AR",
555,"Ripley, AR",
614,"Alta Loma, CA",
1167,"March Air Force Base, CA",
1175,"Mcclellan Afb, CA",


Removed null county values from dataset - mostly this looks like some of the city/state combos are impacted by how they were spelled/spaced such as Mc or McMechen between the datasets.  With more time, would go through and address, but due to impending due date - dropped for purposes of this project.

In [19]:
instacart_counties_nodup.dropna(subset=['county'], inplace=True)
instacart_counties_nodup.count()

City/State    10907
county        10907
dtype: int64

In [20]:
instacart_counties_nodup.head()

Unnamed: 0,City/State,county
0,"Adamsville, AL",Jefferson
1,"Alabaster, AL",Shelby
3,"Albertville, AL",Marshall
4,"Anniston, AL",Calhoun
10,"Arab, AL",Marshall


In [21]:
collection_c.insert_many(instacart_counties_nodup.to_dict('records'))

<pymongo.results.InsertManyResult at 0x19d04759188>

In [22]:
listings = db.instacart_counties.find()

for listing in listings:
    print(listing)

{'_id': ObjectId('5e1aa92a8f9b5a37aed168da'), 'City/State': 'Adamsville, AL', 'county': 'Jefferson'}
{'_id': ObjectId('5e1aa92a8f9b5a37aed168db'), 'City/State': 'Alabaster, AL', 'county': 'Shelby'}
{'_id': ObjectId('5e1aa92a8f9b5a37aed168dc'), 'City/State': 'Albertville, AL', 'county': 'Marshall'}
{'_id': ObjectId('5e1aa92a8f9b5a37aed168dd'), 'City/State': 'Anniston, AL', 'county': 'Calhoun'}
{'_id': ObjectId('5e1aa92a8f9b5a37aed168de'), 'City/State': 'Arab, AL', 'county': 'Marshall'}
{'_id': ObjectId('5e1aa92a8f9b5a37aed168df'), 'City/State': 'Ashford, AL', 'county': 'Houston'}
{'_id': ObjectId('5e1aa92a8f9b5a37aed168e0'), 'City/State': 'Athens, AL', 'county': 'Limestone'}
{'_id': ObjectId('5e1aa92a8f9b5a37aed168e1'), 'City/State': 'Attalla, AL', 'county': 'Etowah'}
{'_id': ObjectId('5e1aa92a8f9b5a37aed168e2'), 'City/State': 'Auburn University, AL', 'county': 'Lee'}
{'_id': ObjectId('5e1aa92a8f9b5a37aed168e3'), 'City/State': 'Auburn, AL', 'county': 'Lee'}
{'_id': ObjectId('5e1aa92a8f9