# Link Instacart Service Area to Counties

Accessing the data in our MongoDB collections for Instacart and Counties, we will use Pandas to merge and create a new collection to house the combined data.

### Import dependencies

In [1]:
import os
import pandas as pd
from pandas import DataFrame
import pymongo

### Setup DB connection and establish collection for storage

In [2]:
# Setup connection to MongoDB
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Connect to DB and Collections plus create collection for storing combined data
db = client.food_desert_db
collection_a = db.instacart_counties
collection_b = db.food_deserts
collection_c = db.food_deserts_with_svc

### Import collections from MongoDB and convert to DataFrames

In [4]:
collection_a_data = db.instacart_counties.find()
collection_b_data = db.food_deserts.find()

In [5]:
svc_area = pd.DataFrame.from_records(collection_a_data)
svc_area.head()

Unnamed: 0,City/State,_id,county
0,"Adamsville, AL",5e1aa92a8f9b5a37aed168da,Jefferson
1,"Alabaster, AL",5e1aa92a8f9b5a37aed168db,Shelby
2,"Albertville, AL",5e1aa92a8f9b5a37aed168dc,Marshall
3,"Anniston, AL",5e1aa92a8f9b5a37aed168dd,Calhoun
4,"Arab, AL",5e1aa92a8f9b5a37aed168de,Marshall


In [6]:
food_deserts = pd.DataFrame.from_records(collection_b_data)
food_deserts.head()

Unnamed: 0,CensusTract,County,State Abr,Urban,_id,food_desert
0,1001020500,Autauga,AL,1,5e1a8a533c87441b0372b9d9,1
1,1001020700,Autauga,AL,1,5e1a8a533c87441b0372b9da,5
2,1001020801,Autauga,AL,0,5e1a8a533c87441b0372b9db,1
3,1001021100,Autauga,AL,0,5e1a8a533c87441b0372b9dc,4
4,1003010500,Baldwin,AL,1,5e1a8a533c87441b0372b9dd,1


In [16]:
food_deserts.count()

CensusTract    27713
County         27713
State Abr      27648
Urban          27713
_id            27713
food_desert    27713
dtype: int64

In [17]:
svc_area.rename(columns = {'county': 'County'}, inplace = True)
svc_area.head()

Unnamed: 0,City/State,_id,County
0,"Adamsville, AL",5e1aa92a8f9b5a37aed168da,Jefferson
1,"Alabaster, AL",5e1aa92a8f9b5a37aed168db,Shelby
2,"Albertville, AL",5e1aa92a8f9b5a37aed168dc,Marshall
3,"Anniston, AL",5e1aa92a8f9b5a37aed168dd,Calhoun
4,"Arab, AL",5e1aa92a8f9b5a37aed168de,Marshall


In [18]:
desert_svc = pd.merge(food_deserts, svc_area, on='County', how='left')
desert_svc.head()

Unnamed: 0,CensusTract,County,State Abr,Urban,_id_x,food_desert,City/State,_id_y
0,1001020500,Autauga,AL,1,5e1a8a533c87441b0372b9d9,1,"Prattville, AL",5e1aa92a8f9b5a37aed16948
1,1001020700,Autauga,AL,1,5e1a8a533c87441b0372b9da,5,"Prattville, AL",5e1aa92a8f9b5a37aed16948
2,1001020801,Autauga,AL,0,5e1a8a533c87441b0372b9db,1,"Prattville, AL",5e1aa92a8f9b5a37aed16948
3,1001021100,Autauga,AL,0,5e1a8a533c87441b0372b9dc,4,"Prattville, AL",5e1aa92a8f9b5a37aed16948
4,1003010500,Baldwin,AL,1,5e1a8a533c87441b0372b9dd,1,"Bon Secour, AL",5e1aa92a8f9b5a37aed168ea


In [19]:
desert_svc.count()

CensusTract    797963
County         797963
State Abr      797898
Urban          797963
_id_x          797963
food_desert    797963
City/State     796364
_id_y          796364
dtype: int64

In [20]:
desert_svc = desert_svc[['County', 'City/State', 'food_desert']]
desert_svc.head()

Unnamed: 0,County,City/State,food_desert
0,Autauga,"Prattville, AL",1
1,Autauga,"Prattville, AL",5
2,Autauga,"Prattville, AL",1
3,Autauga,"Prattville, AL",4
4,Baldwin,"Bon Secour, AL",1


In [21]:
desert_svc.loc[desert_svc.isnull().any(axis=1)]

Unnamed: 0,County,City/State,food_desert
84,Barbour,,2
85,Barbour,,2
86,Barbour,,2
87,Barbour,,1
88,Barbour,,5
110,Bullock,,4
111,Bullock,,1
112,Bullock,,4
371,Chilton,,5
372,Choctaw,,4


Removed null City/State values from dataset -  With more time, would go through and address any potential syntax issues on the strings, but due to impending due date - dropped for purposes of this project.  Remaining sets ensure only food_deserts with svc remains

In [22]:
desert_svc.dropna(subset=['City/State'], inplace=True)
desert_svc.count()

County         796364
City/State     796364
food_desert    796364
dtype: int64

In [23]:
desert_svc_nodup = DataFrame.drop_duplicates(desert_svc).copy()
desert_svc_nodup

Unnamed: 0,County,City/State,food_desert
0,Autauga,"Prattville, AL",1
1,Autauga,"Prattville, AL",5
3,Autauga,"Prattville, AL",4
4,Baldwin,"Bon Secour, AL",1
5,Baldwin,"Daphne, AL",1
6,Baldwin,"Fairhope, AL",1
7,Baldwin,"Foley, AL",1
8,Baldwin,"Gulf Shores, AL",1
9,Baldwin,"Loxley, AL",1
10,Baldwin,"Magnolia Springs, AL",1


In [24]:
desert_svc_nodup.count()

County         39002
City/State     39002
food_desert    39002
dtype: int64

In [30]:
desert_svc_groups = desert_svc_nodup.groupby(['County','City/State']).sum().reset_index()
desert_svc_groups

Unnamed: 0,County,City/State,food_desert
0,Abbeville,"Abbeville, SC",7
1,Acadia,"Branch, LA",9
2,Acadia,"Crowley, LA",9
3,Acadia,"Egan, LA",9
4,Acadia,"Estherwood, LA",9
5,Acadia,"Morse, LA",9
6,Acadia,"Rayne, LA",9
7,Accomack,"Accomac, VA",7
8,Accomack,"Belle Haven, VA",7
9,Accomack,"Bloxom, VA",7


In [31]:
collection_c.insert_many(desert_svc_groups.to_dict('records'))

<pymongo.results.InsertManyResult at 0x26cb5fbb188>

In [32]:
listings = db.food_deserts_with_svc.find()

for listing in listings:
    print(listing)

{'_id': ObjectId('5e1aac45736f958b6595e84a'), 'County': 'Abbeville', 'City/State': 'Abbeville, SC', 'food_desert': 7}
{'_id': ObjectId('5e1aac45736f958b6595e84b'), 'County': 'Acadia', 'City/State': 'Branch, LA', 'food_desert': 9}
{'_id': ObjectId('5e1aac45736f958b6595e84c'), 'County': 'Acadia', 'City/State': 'Crowley, LA', 'food_desert': 9}
{'_id': ObjectId('5e1aac45736f958b6595e84d'), 'County': 'Acadia', 'City/State': 'Egan, LA', 'food_desert': 9}
{'_id': ObjectId('5e1aac45736f958b6595e84e'), 'County': 'Acadia', 'City/State': 'Estherwood, LA', 'food_desert': 9}
{'_id': ObjectId('5e1aac45736f958b6595e84f'), 'County': 'Acadia', 'City/State': 'Morse, LA', 'food_desert': 9}
{'_id': ObjectId('5e1aac45736f958b6595e850'), 'County': 'Acadia', 'City/State': 'Rayne, LA', 'food_desert': 9}
{'_id': ObjectId('5e1aac45736f958b6595e851'), 'County': 'Accomack', 'City/State': 'Accomac, VA', 'food_desert': 7}
{'_id': ObjectId('5e1aac45736f958b6595e852'), 'County': 'Accomack', 'City/State': 'Belle Haven