# Practicing with merge

In [2]:
import numpy as np
import pandas as pd

In [3]:
# PRACTICING MERGE -- ignore this cell
sample_school = pd.DataFrame({'BEDS Code':[0,1,2,3,4,5,1,2,3,1],
                              'Other Field': list('abcdefghij'),
                              'Another Field':[10,11,12,13,14,15]+[np.nan]*4})
sample_latlon = pd.DataFrame({'SED CODE':[1,2,3], 'latlon':['one','two','three']})

new = pd.merge(sample_school, sample_latlon, left_on=['BEDS Code'],right_on=['SED CODE'], how='left')
new

Unnamed: 0,Another Field,BEDS Code,Other Field,SED CODE,latlon
0,10.0,0,a,,
1,11.0,1,b,1.0,one
2,12.0,2,c,2.0,two
3,13.0,3,d,3.0,three
4,14.0,4,e,,
5,15.0,5,f,,
6,,1,g,1.0,one
7,,2,h,2.0,two
8,,3,i,3.0,three
9,,1,j,1.0,one


In [4]:
# Take a look at the resulting data/missing values
print('... {} unique schools,'.format(len(new['BEDS Code'].unique())))
schools_missing_latlon = new[new['latlon'].isnull()]['BEDS Code'].unique()
print('... of which {} are missing lat/long'.format(len(schools_missing_latlon)))

... 6 unique schools,
... of which 3 are missing lat/long


# Practicing with fillling na
GOAL: to use this to fill in missing boroughs

In [5]:
# create dictionary for filling 'Another Field'
d = new[new['Another Field'].notnull()][['BEDS Code','Another Field']].to_dict()
d

{'Another Field': {0: 10.0, 1: 11.0, 2: 12.0, 3: 13.0, 4: 14.0, 5: 15.0},
 'BEDS Code': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}}

In [6]:
map_vals = {d['BEDS Code'][idx]: d['Another Field'][idx] for idx in d['Another Field'].keys()}
map_vals

{0: 10.0, 1: 11.0, 2: 12.0, 3: 13.0, 4: 14.0, 5: 15.0}

In [7]:
new['A Third Field'] = new['BEDS Code'].map(map_vals)
new

In [12]:
# What if the map dict had been missing values?
other_map = {d['BEDS Code'][idx]: d['Another Field'][idx] for idx in d['Another Field'].keys()}
del other_map[1]
other_map

{0: 10.0, 2: 12.0, 3: 13.0, 4: 14.0, 5: 15.0}

In [13]:
# I see ... it will leave NaNs
new['A Fourth Field'] = new['BEDS Code'].map(other_map)
new

Unnamed: 0,Another Field,BEDS Code,Other Field,SED CODE,latlon,A Third Field,A Fourth Field
0,10.0,0,a,,,10,10.0
1,11.0,1,b,1.0,one,11,
2,12.0,2,c,2.0,two,12,12.0
3,13.0,3,d,3.0,three,13,13.0
4,14.0,4,e,,,14,14.0
5,15.0,5,f,,,15,15.0
6,,1,g,1.0,one,11,
7,,2,h,2.0,two,12,12.0
8,,3,i,3.0,three,13,13.0
9,,1,j,1.0,one,11,


In [14]:
# can I map w/out the dictionary. SWEEEET
new['A Fifth Field'] = new['BEDS Code'].map(new['Another Field'])
new

Unnamed: 0,Another Field,BEDS Code,Other Field,SED CODE,latlon,A Third Field,A Fourth Field,A Fifth Field
0,10.0,0,a,,,10,10.0,10
1,11.0,1,b,1.0,one,11,,11
2,12.0,2,c,2.0,two,12,12.0,12
3,13.0,3,d,3.0,three,13,13.0,13
4,14.0,4,e,,,14,14.0,14
5,15.0,5,f,,,15,15.0,15
6,,1,g,1.0,one,11,,11
7,,2,h,2.0,two,12,12.0,12
8,,3,i,3.0,three,13,13.0,13
9,,1,j,1.0,one,11,,11
