# How to apply function to multiple columns in Pandas

In [1]:
import pandas as pd

df = pd.read_csv(f'../data/earthquakes_1965_2016_database.csv.zip').tail(10)
df

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
23402,12/24/2016,03:58:55,-5.146,153.5166,Earthquake,30.0,1.8,,5.8,MWW,...,,14.0,1.648,7.0,0.85,US10007MFP,US,US,US,Reviewed
23403,12/25/2016,14:22:27,-43.4029,-73.9395,Earthquake,38.0,1.9,,7.6,MWW,...,,29.0,0.351,6.8,0.8,US10007MN3,US,US,US,Reviewed
23404,12/25/2016,14:32:13,-43.481,-74.4771,Earthquake,14.93,3.3,,5.6,MB,...,83.0,96.0,0.697,7.1,0.52,US10007MNB,US,US,US,Reviewed
23405,12/27/2016,23:20:56,45.7192,26.523,Earthquake,97.0,1.8,,5.6,MWW,...,,14.0,0.465,5.1,0.78,US10007N3R,US,US,US,Reviewed
23406,12/28/2016,08:18:01,38.3754,-118.8977,Earthquake,10.8,1.3,34.0,5.6,ML,...,20.0,35.86,0.132,,0.1988,NN00570709,NN,NN,NN,Reviewed
23407,12/28/2016,08:22:12,38.3917,-118.8941,Earthquake,12.3,1.2,40.0,5.6,ML,...,18.0,42.47,0.12,,0.1898,NN00570710,NN,NN,NN,Reviewed
23408,12/28/2016,09:13:47,38.3777,-118.8957,Earthquake,8.8,2.0,33.0,5.5,ML,...,18.0,48.58,0.129,,0.2187,NN00570744,NN,NN,NN,Reviewed
23409,12/28/2016,12:38:51,36.9179,140.4262,Earthquake,10.0,1.8,,5.9,MWW,...,,91.0,0.992,4.8,1.52,US10007NAF,US,US,US,Reviewed
23410,12/29/2016,22:30:19,-9.0283,118.6639,Earthquake,79.0,1.8,,6.3,MWW,...,,26.0,3.553,6.0,1.43,US10007NL0,US,US,US,Reviewed
23411,12/30/2016,20:08:28,37.3973,141.4103,Earthquake,11.94,2.2,,5.5,MB,...,428.0,97.0,0.681,4.5,0.91,US10007NTD,US,US,US,Reviewed


## Apply function

In [3]:
import geocoder

def geo_rev(x):
    g = geocoder.osm([x['Latitude'], x['Longitude']], method='reverse').json
    if g:
        return g.get('country')
    else:
        return 'no country'

df.apply(geo_rev, axis=1)

23402    Papua Niugini
23403            Chile
23404            Chile
23405          România
23406    United States
23407    United States
23408    United States
23409               日本
23410        Indonesia
23411       no country
dtype: object

## Apply function with parameters

In [4]:
def get_date_time(row, date, time):
    return row[date] + ' ' +row[time]

df.apply(get_date_time, axis=1, date='Date', time='Time')

23402    12/24/2016 03:58:55
23403    12/25/2016 14:22:27
23404    12/25/2016 14:32:13
23405    12/27/2016 23:20:56
23406    12/28/2016 08:18:01
23407    12/28/2016 08:22:12
23408    12/28/2016 09:13:47
23409    12/28/2016 12:38:51
23410    12/29/2016 22:30:19
23411    12/30/2016 20:08:28
dtype: object

## Lambda multiple input values

In [5]:
import pandas as pd

def geo_rev(lat, lon):
    g = geocoder.osm([lat, lon], method='reverse').json
    if g:
        return g.get('country')
    else:
        return 'no country'

df.apply(lambda x: geo_rev(x['Latitude'], x['Longitude']), axis=1)

23402    Papua Niugini
23403            Chile
23404            Chile
23405          România
23406    United States
23407    United States
23408    United States
23409               日本
23410        Indonesia
23411       no country
dtype: object

In [6]:
df['country'] = df.apply(lambda x: geo_rev(x['Latitude'], x['Longitude']), axis=1)

## Multiple columns

In [7]:
def geo_rev(lat, lon, mag):
    g = geocoder.osm([lat, lon], method='reverse').json
    if g:
        return g.get('country') + ' ' + str(mag)
    else:
        return 'no country '

df[['Latitude', 'Longitude', 'Magnitude']].apply(lambda x: geo_rev(*x), axis=1)

23402    Papua Niugini 5.8
23403            Chile 7.6
23404            Chile 5.6
23405          România 5.6
23406    United States 5.6
23407    United States 5.6
23408    United States 5.5
23409               日本 5.9
23410        Indonesia 6.3
23411          no country 
dtype: object

## No apply 

In [8]:
def geo_rev(lat, lon):
    g = geocoder.osm([lat, lon], method='reverse').json
    if g:
        return g.get('country')
    else:
        return 'no country '


list(map(geo_rev, df['Latitude'], df['Longitude']))

['Papua Niugini',
 'Chile',
 'Chile',
 'România',
 'United States',
 'United States',
 'United States',
 '日本',
 'Indonesia',
 'no country ']

## Performance tests

In [9]:
%timeit df.apply(get_date_time, axis=1, date='Date', time='Time')

1000 loops, best of 3: 760 µs per loop


In [10]:
def get_date_time(date, time):
    return date + ' ' + time

%timeit list(map(get_date_time, df['Date'], df['Time']))

The slowest run took 11.65 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 12 µs per loop
