# How to Replace Values in Column Based On Another DataFrame in Pandas

In [1]:
import pandas as pd

df1 = pd.read_csv(f'../data/earthquakes_1965_2016_database.csv.zip')
cols = ['Date', 'Time', 'Latitude', 'Longitude', 'Depth', 'Magnitude Type', 'Type', 'ID']
df1 = df1[cols]
df1 = df1.tail(7)
df1

Unnamed: 0,Date,Time,Latitude,Longitude,Depth,Magnitude Type,Type,ID
23405,12/27/2016,23:20:56,45.7192,26.523,97.0,MWW,Earthquake,US10007N3R
23406,12/28/2016,08:18:01,38.3754,-118.8977,10.8,ML,Earthquake,NN00570709
23407,12/28/2016,08:22:12,38.3917,-118.8941,12.3,ML,Earthquake,NN00570710
23408,12/28/2016,09:13:47,38.3777,-118.8957,8.8,ML,Earthquake,NN00570744
23409,12/28/2016,12:38:51,36.9179,140.4262,10.0,MWW,Earthquake,US10007NAF
23410,12/29/2016,22:30:19,-9.0283,118.6639,79.0,MWW,Earthquake,US10007NL0
23411,12/30/2016,20:08:28,37.3973,141.4103,11.94,MB,Earthquake,US10007NTD


## Step 1: Create sample DataFrame

In [2]:
import geocoder

def geo_rev(x):
    g = geocoder.osm([x.Latitude, x.Longitude], method='reverse').json
    if g:
        return g.get('country')
    else:
        return 'no country'

df2 = pd.DataFrame({'country': df1[['Latitude', 'Longitude']].apply(geo_rev, axis=1)})
df2

Unnamed: 0,country
23405,România
23406,United States
23407,United States
23408,United States
23409,日本
23410,Indonesia
23411,no country


## Step 2: Replace Values with matching indices

In [3]:
df2.loc[:, ['ID']] = df1[['ID']]
df2

Unnamed: 0,country,ID
23405,România,US10007N3R
23406,United States,NN00570709
23407,United States,NN00570710
23408,United States,NN00570744
23409,日本,US10007NAF
23410,Indonesia,US10007NL0
23411,no country,US10007NTD


In [4]:
df3 = df2.head(7).reset_index().copy()
df4, df5 = (df2.head(7).copy(), ) *2
df3

Unnamed: 0,index,country,ID
0,23405,România,US10007N3R
1,23406,United States,NN00570709
2,23407,United States,NN00570710
3,23408,United States,NN00570744
4,23409,日本,US10007NAF
5,23410,Indonesia,US10007NL0
6,23411,no country,US10007NTD


In [5]:
df2.loc[:, ['Latitude', 'Longitude']] = df1[['Latitude', 'Longitude']]
df2

Unnamed: 0,country,ID,Latitude,Longitude
23405,România,US10007N3R,45.7192,26.523
23406,United States,NN00570709,38.3754,-118.8977
23407,United States,NN00570710,38.3917,-118.8941
23408,United States,NN00570744,38.3777,-118.8957
23409,日本,US10007NAF,36.9179,140.4262
23410,Indonesia,US10007NL0,-9.0283,118.6639
23411,no country,US10007NTD,37.3973,141.4103


## Step 3: Replace Values with non matching indices

In [6]:
df3.loc[:, ['Latitude', 'Longitude']] = df1[['Latitude', 'Longitude']]
df3

Unnamed: 0,index,country,ID,Latitude,Longitude
0,23405,România,US10007N3R,,
1,23406,United States,NN00570709,,
2,23407,United States,NN00570710,,
3,23408,United States,NN00570744,,
4,23409,日本,US10007NAF,,
5,23410,Indonesia,US10007NL0,,
6,23411,no country,US10007NTD,,


## Step 3: Replace Values with non matching indices

In [7]:
col = 'ID'
cols_to_replace = ['Latitude', 'Longitude']
df3.loc[df3[col].isin(df1[col]), cols_to_replace] = df1.loc[df1[col].isin(df3[col]),cols_to_replace].values

In [8]:
df3

Unnamed: 0,index,country,ID,Latitude,Longitude
0,23405,România,US10007N3R,45.7192,26.523
1,23406,United States,NN00570709,38.3754,-118.8977
2,23407,United States,NN00570710,38.3917,-118.8941
3,23408,United States,NN00570744,38.3777,-118.8957
4,23409,日本,US10007NAF,36.9179,140.4262
5,23410,Indonesia,US10007NL0,-9.0283,118.6639
6,23411,no country,US10007NTD,37.3973,141.4103


## Step 4: Replace Values with merge of the two DataFrames

In [9]:
df1.head(2)

Unnamed: 0,Date,Time,Latitude,Longitude,Depth,Magnitude Type,Type,ID
23405,12/27/2016,23:20:56,45.7192,26.523,97.0,MWW,Earthquake,US10007N3R
23406,12/28/2016,08:18:01,38.3754,-118.8977,10.8,ML,Earthquake,NN00570709


In [10]:
df4.head(2)

Unnamed: 0,country,ID
23405,România,US10007N3R
23406,United States,NN00570709


### The column not exists in the first DataFrame

In [11]:
df4 = df4.merge(df1,on='ID',how="left")
df4

Unnamed: 0,country,ID,Date,Time,Latitude,Longitude,Depth,Magnitude Type,Type
0,România,US10007N3R,12/27/2016,23:20:56,45.7192,26.523,97.0,MWW,Earthquake
1,United States,NN00570709,12/28/2016,08:18:01,38.3754,-118.8977,10.8,ML,Earthquake
2,United States,NN00570710,12/28/2016,08:22:12,38.3917,-118.8941,12.3,ML,Earthquake
3,United States,NN00570744,12/28/2016,09:13:47,38.3777,-118.8957,8.8,ML,Earthquake
4,日本,US10007NAF,12/28/2016,12:38:51,36.9179,140.4262,10.0,MWW,Earthquake
5,Indonesia,US10007NL0,12/29/2016,22:30:19,-9.0283,118.6639,79.0,MWW,Earthquake
6,no country,US10007NTD,12/30/2016,20:08:28,37.3973,141.4103,11.94,MB,Earthquake


### The column exists in the first DataFrame

In [12]:
import numpy as np

df5.loc[:, ['ID', 'Longitude']] = df1[['ID', 'Longitude']]


df5.iloc[[1,3,4], -1] = np.NaN
df5.head(5)

Unnamed: 0,country,ID,Longitude
23405,România,US10007N3R,26.523
23406,United States,NN00570709,
23407,United States,NN00570710,-118.8941
23408,United States,NN00570744,
23409,日本,US10007NAF,


In [13]:
df5

Unnamed: 0,country,ID,Longitude
23405,România,US10007N3R,26.523
23406,United States,NN00570709,
23407,United States,NN00570710,-118.8941
23408,United States,NN00570744,
23409,日本,US10007NAF,
23410,Indonesia,US10007NL0,118.6639
23411,no country,US10007NTD,141.4103


In [14]:
df5 = df5.merge(df1[['Longitude', 'ID']],on='ID',how="left")

df5['Longitude_y'] = df5['Longitude_y'].fillna(df5['Longitude_x'])

df5.drop(["Longitude_x"], inplace=True, axis=1)
df5.rename(columns={'Longitude_y':'Longitude'},inplace=True)

df5

Unnamed: 0,country,ID,Longitude
0,România,US10007N3R,26.523
1,United States,NN00570709,-118.8977
2,United States,NN00570710,-118.8941
3,United States,NN00570744,-118.8957
4,日本,US10007NAF,140.4262
5,Indonesia,US10007NL0,118.6639
6,no country,US10007NTD,141.4103
