In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from geopy.exc import GeocoderTimedOut 
from geopy.geocoders import Nominatim

%matplotlib inline

### Merge NDC Deliveries with NDC Partners to get Partners, Addresses, and Total Diaper Deliveries to be used in a map

In [2]:
#import NDC deliveries csv
deliveries = pd.read_csv('../data/Nash Diaper Stastistics - 2020 Deliveries.csv')
print(deliveries.shape)
deliveries.head()

(766, 16)


Unnamed: 0,Partners,Date,NB,1,2,3,4,5,6,Pullups,Total,Running Total,Ever to Date,Year to Date,Unnamed: 14,Unnamed: 15
0,NICE Highlands Apartments,1/4/2020,500.0,400.0,450.0,600.0,600.0,600.0,600.0,,3750.0,3750.0,2321750.0,,,
1,Martha O'Bryan,1/7/2020,,,,,600.0,600.0,600.0,,1800.0,5550.0,2323550.0,,,
2,Madison Hispanic 7th Day,1/7/2020,,,450.0,450.0,500.0,750.0,750.0,,2900.0,8450.0,2326450.0,,2900.0,
3,Legacy Mission Village,1/7/2020,1000.0,1000.0,1600.0,2375.0,4500.0,4800.0,4800.0,,20075.0,28525.0,2346525.0,,,
4,Tusculum Elementary,1/7/2020,150.0,175.0,150.0,150.0,150.0,150.0,150.0,,1075.0,29600.0,2347600.0,,,


In [3]:
#import NDC deliveries csv
partners = pd.read_csv('../data/Nash Diaper Stastistics - 2020 Partner Agencies.csv')
print(partners.shape)
partners.head()

(116, 2)


Unnamed: 0,Partner,Address
0,Alex Green Elementary,"3921 Lloyd Road, Nashville, TN 37189"
1,Antioch High School,"1900 Hobson Pike, Antioch, TN 37013"
2,Catholic Charities,"2806 McGavock Pk, Nashville, TN 37214"
3,Cole Elementary,"5060 Colemont Dr, Antioch, TN 37013"
4,Connectus Priest Lake,"2637 Murfreesboro Pike, Nashville, TN 37217"


In [4]:
partners.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116 entries, 0 to 115
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Partner  116 non-null    object
 1   Address  113 non-null    object
dtypes: object(2)
memory usage: 1.9+ KB


In [5]:
# declare an empty list to store 
# latitude and longitude of values  
# of city column 
longitude = [] 
latitude = [] 
   
# function to find the coordinate 
# of a given address  
def findGeocode(address): 
       
    # try and catch is used to overcome 
    # the exception thrown by geolocator 
    # using geocodertimedout   
    try: 
          
        # Specify the user_agent as your 
        # app name it should not be none 
        geolocator = Nominatim(user_agent="paul") 
          
        return geolocator.geocode(address) 
      
    except GeocoderTimedOut: 
          
        return findGeocode(address)     
  
# each value from address column 
# will be fetched and sent to 
# function find_geocode    
for i in (partners['Address']): 
      
    if findGeocode(i) != None: 
           
        loc = findGeocode(i) 
          
        # coordinates returned from  
        # function is stored into 
        # two separate list 
        latitude.append(loc.latitude) 
        longitude.append(loc.longitude) 
       
    # if coordinate for a city not 
    # found, insert "NaN" indicating  
    # missing value  
    else: 
        latitude.append('NaN') 
        longitude.append('NaN') 
#Showing the output produced as dataframe.

#filter_none
#brightness_4
# now add this column to dataframe 
partners["Longitude"] = longitude 
partners["Latitude"] = latitude 
  
partners.head()

Unnamed: 0,Partner,Address,Longitude,Latitude
0,Alex Green Elementary,"3921 Lloyd Road, Nashville, TN 37189",-86.8358,36.2527
1,Antioch High School,"1900 Hobson Pike, Antioch, TN 37013",-86.5985,36.0471
2,Catholic Charities,"2806 McGavock Pk, Nashville, TN 37214",-86.6799,36.1689
3,Cole Elementary,"5060 Colemont Dr, Antioch, TN 37013",-86.6952,36.0607
4,Connectus Priest Lake,"2637 Murfreesboro Pike, Nashville, TN 37217",-86.6669,36.1036


In [15]:
#merge the two files to get addresses for partners
ndc_deliveries = pd.merge(deliveries, partners, left_on = 'Partners', right_on = 'Partner', how = 'left')
ndc_deliveries.head()

Unnamed: 0,Partners,Date,NB,1,2,3,4,5,6,Pullups,Total,Running Total,Ever to Date,Year to Date,Unnamed: 14,Unnamed: 15,Partner,Address,Longitude,Latitude
0,NICE Highlands Apartments,1/4/2020,500.0,400.0,450.0,600.0,600.0,600.0,600.0,,3750.0,3750.0,2321750.0,,,,NICE Highlands Apartments,"4646 Nolensville Pike, Nashville, TN 37211",-86.7236,36.0714
1,Martha O'Bryan,1/7/2020,,,,,600.0,600.0,600.0,,1800.0,5550.0,2323550.0,,,,Martha O'Bryan,"711 S 7th St, Nashville, TN 37206",-86.7606,36.174
2,Madison Hispanic 7th Day,1/7/2020,,,450.0,450.0,500.0,750.0,750.0,,2900.0,8450.0,2326450.0,,2900.0,,Madison Hispanic 7th Day,"1605 Neelys Bend Rd, Nashville, TN 37115",-86.7038,36.2554
3,Legacy Mission Village,1/7/2020,1000.0,1000.0,1600.0,2375.0,4500.0,4800.0,4800.0,,20075.0,28525.0,2346525.0,,,,Legacy Mission Village,"5123 Nolensville Pike, Nashville, TN 37211",-86.7236,36.0714
4,Tusculum Elementary,1/7/2020,150.0,175.0,150.0,150.0,150.0,150.0,150.0,,1075.0,29600.0,2347600.0,,,,Tusculum Elementary,"440 McMurray Dr, Nashville, TN 37211",-86.7225,36.0589


In [16]:
ndc_deliveries.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 769 entries, 0 to 768
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Partners       769 non-null    object 
 1   Date           765 non-null    object 
 2   NB             721 non-null    float64
 3   1              723 non-null    float64
 4   2              733 non-null    float64
 5   3              742 non-null    float64
 6   4              747 non-null    float64
 7   5              753 non-null    float64
 8   6              749 non-null    float64
 9   Pullups        18 non-null     object 
 10  Total          765 non-null    float64
 11  Running Total  766 non-null    float64
 12  Ever to Date   766 non-null    float64
 13  Year to Date   44 non-null     object 
 14  Unnamed: 14    56 non-null     object 
 15  Unnamed: 15    18 non-null     object 
 16  Partner        769 non-null    object 
 17  Address        763 non-null    object 
 18  Longitude 

In [17]:
#check for null addresses
ndc_deliveries[ndc_deliveries['Address'].isnull()]

Unnamed: 0,Partners,Date,NB,1,2,3,4,5,6,Pullups,Total,Running Total,Ever to Date,Year to Date,Unnamed: 14,Unnamed: 15,Partner,Address,Longitude,Latitude
67,Team Music Is Love One Gen Away,3/6/2020,900.0,1050.0,1000.0,1600.0,800.0,825.0,750.0,,6925.0,69559.0,2532824.0,,,,Team Music Is Love One Gen Away,,11.048,46.3145
148,Salvation Army,4/6/2020,0.0,0.0,250.0,250.0,150.0,250.0,200.0,,1100.0,38800.0,2700105.0,,,,Salvation Army,,11.048,46.3145
294,Andrea Hollis,5/15/2020,0.0,0.0,0.0,0.0,46.0,0.0,0.0,,46.0,127978.0,3010758.0,,,,Andrea Hollis,,11.048,46.3145
392,Andrea Hollis,6/15/2020,,,,,100.0,,,,100.0,112223.0,3230719.0,,,,Andrea Hollis,,11.048,46.3145
489,Andrea Hollis,7/17/2020,,,,,150.0,,,,150.0,111735.0,3457726.0,1141076.0,32945.0,,Andrea Hollis,,11.048,46.3145
580,Andrea Hollis,9/2/2020,,,,,50.0,,,,50.0,17325.0,3726846.0,,,,Andrea Hollis,,11.048,46.3145


In [18]:
#drop rows with null addresses
ndc_deliveries=ndc_deliveries.dropna(subset=['Address'])
ndc_deliveries.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 763 entries, 0 to 768
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Partners       763 non-null    object 
 1   Date           759 non-null    object 
 2   NB             718 non-null    float64
 3   1              720 non-null    float64
 4   2              730 non-null    float64
 5   3              739 non-null    float64
 6   4              741 non-null    float64
 7   5              750 non-null    float64
 8   6              746 non-null    float64
 9   Pullups        18 non-null     object 
 10  Total          759 non-null    float64
 11  Running Total  760 non-null    float64
 12  Ever to Date   760 non-null    float64
 13  Year to Date   43 non-null     object 
 14  Unnamed: 14    55 non-null     object 
 15  Unnamed: 15    18 non-null     object 
 16  Partner        763 non-null    object 
 17  Address        763 non-null    object 
 18  Longitude 

In [19]:
#only keep certain columns
#ndc_deliveries = ndc_deliveries[['Partners', 'Address', 'Total', 'Longitude', 'Latitude']].copy()
#ndc_deliveries.head()

Unnamed: 0,Partners,Address,Total
0,NICE Highlands Apartments,"4646 Nolensville Pike, Nashville, TN 37211",3750.0
1,Martha O'Bryan,"711 S 7th St, Nashville, TN 37206",1800.0
2,Madison Hispanic 7th Day,"1605 Neelys Bend Rd, Nashville, TN 37115",2900.0
3,Legacy Mission Village,"5123 Nolensville Pike, Nashville, TN 37211",20075.0
4,Tusculum Elementary,"440 McMurray Dr, Nashville, TN 37211",1075.0


In [23]:
#group by Partners and add values in total column
ndc_deliveries = ndc_deliveries.groupby(['Partners','Address'], as_index=False)['Total'].sum()
ndc_deliveries

Unnamed: 0,Partners,Address,Total
0,10th & Fatherland,"1000 Fatherland St, Nashville, TN 37206",3500.0
1,Alex Green Elementary,"3921 Lloyd Road, Nashville, TN 37189",2250.0
2,Antioch High School,"1900 Hobson Pike, Antioch, TN 37013",82250.0
3,Antioch Middle School,"5050 Blue Hole Rd, Antioch, TN 37013",3100.0
4,Apollo Middle School,"631 Richards Rd, Nashville, TN 37013",5150.0
...,...,...,...
104,Whites Creek High School,"7277 Old Hickory Blvd, Whites Creek, TN 37189",2450.0
105,Whitsitt Elementary,"110 Whitsett Rd, Nashville, TN 37210",1100.0
106,Wilson County Community Help Center,"203 W High St, Lebanon, TN 37087",2450.0
107,Woodmont Baptist Church,"2100 Woodmond Blvd, Nashville, TN 37215",3300.0


In [14]:
#export as csv
#partner_totals=partners.to_csv('partner_totals.csv', index = False)

In [24]:
#export as csv
#ndc_deliveries=ndc_deliveries.to_csv('ndc_deliveries.csv', index = False)