### How to fill missing value based on other columns in Pandas dataframe?

In [1]:
import pandas as pd
import numpy as np
df = pd.DataFrame(
    np.array([[1, 2, 3], [4, 5, np.nan], [7, 8, 9], [3, 2, np.nan], [5, 6, np.nan]]), 
    columns=['a', 'b', 'c']
)
df

Unnamed: 0,a,b,c
0,1.0,2.0,3.0
1,4.0,5.0,
2,7.0,8.0,9.0
3,3.0,2.0,
4,5.0,6.0,


In [2]:
df.apply(
    lambda row: row['a']*row['b'] if np.isnan(row['c']) else row['c'],
    axis=1
) # axis=1 for going over row by row

0     3.0
1    20.0
2     9.0
3     6.0
4    30.0
dtype: float64

In [3]:
df['c'] = df.apply(
    lambda row: row['a']*row['b'] if np.isnan(row['c']) else row['c'],
    axis=1
)
df

Unnamed: 0,a,b,c
0,1.0,2.0,3.0
1,4.0,5.0,20.0
2,7.0,8.0,9.0
3,3.0,2.0,6.0
4,5.0,6.0,30.0


### Change one value based on another value in pandas

In [8]:
df2 = pd.DataFrame(
    [
        [103, 'a', 'b'], 
        [104, 'c', 'd'],
        [105, 'zz', 'aa'],
    ], 
    columns=['ID', 'First_Name', 'Last_Name']
)
df2
#    ID First_Name Last_Name
#0  103          a         b
#1  104          c         d

Unnamed: 0,ID,First_Name,Last_Name
0,103,a,b
1,104,c,d
2,105,zz,aa


In [9]:
# Create the dicts:
fnames = {103: "Matt", 104: "Mr"}
lnames = {103: "Jones", 104: "X"}
fnames
lnames

{103: 'Jones', 104: 'X'}

In [10]:
#And map:
df2['First_Name'] = df2['ID'].map(fnames)
df2['Last_Name'] = df2['ID'].map(lnames)
df2

Unnamed: 0,ID,First_Name,Last_Name
0,103,Matt,Jones
1,104,Mr,X
2,105,,


In [14]:
# Or use a custom function:
names = {
    103: ("Matt2", "Jones2"), 
    104: ("Mr2", "X2"),
    105: ("AA2", "BB2")
}
names

{103: ('Matt2', 'Jones2'), 104: ('Mr2', 'X2'), 105: ('AA2', 'BB2')}

In [15]:
df2['First_Name'] = df2['ID'].map(lambda x: names[x][0])
df2

Unnamed: 0,ID,First_Name,Last_Name
0,103,Matt2,Jones
1,104,Mr2,X
2,105,AA2,


### Combine two pandas Data Frames (join on a common column)

## Sort  a list

In [None]:
from operator import itemgetter
data = [('abc', 121),('aac', 231),('abca', 148), ('aaa',999), ('zzz',111)]
data2=sorted(data,key=itemgetter(1),reverse=False)

data2[0][0]

## Distance between 2 locations (latitude,longitude)

In [1]:
# Distance
import geopy.distance

coords_1 = (52.2296756, 21.0122287)
coords_2 = (52.406374, 16.9251681)

geopy.distance.vincenty(coords_1, coords_2).km # 279 Km

279.35290160386563