## Swiftapply – A Python Package for Efficient and Superfast use of Pandas ‘apply’ Function

### Imports and Data 

In [3]:
import pandas as pd
import numpy as np
import swifter

In [4]:
trips = pd.read_csv('trip.csv')

In [6]:
print(trips.shape)
trips.head()

(669959, 11)


Unnamed: 0,id,duration,start_date,start_station_name,start_station_id,end_date,end_station_name,end_station_id,bike_id,subscription_type,zip_code
0,4576,63,8/29/2013 14:13,South Van Ness at Market,66,8/29/2013 14:14,South Van Ness at Market,66,520,Subscriber,94127
1,4607,70,8/29/2013 14:42,San Jose City Hall,10,8/29/2013 14:43,San Jose City Hall,10,661,Subscriber,95138
2,4130,71,8/29/2013 10:16,Mountain View City Hall,27,8/29/2013 10:17,Mountain View City Hall,27,48,Subscriber,97214
3,4251,77,8/29/2013 11:29,San Jose City Hall,10,8/29/2013 11:30,San Jose City Hall,10,26,Subscriber,95060
4,4299,83,8/29/2013 12:02,South Van Ness at Market,66,8/29/2013 12:04,Market at 10th,67,319,Subscriber,94103


### Apply any function in the fastest available manner¶

In [9]:
def bikes_start_end(end_id):
    if end_id > 50:
        return True
    else:
        return False

In [11]:
%time trips['bike_station_class'] = trips['end_station_id'].swifter.apply(bikes_start_end)

HBox(children=(FloatProgress(value=0.0, description='Dask Apply', max=8.0, style=ProgressStyle(description_wid…


Wall time: 5.14 s


#### write code in a vectorized format

In [18]:
def bikes_start_end_vectorized(end_id):
    return np.where(end_id > 5, True, False)

In [19]:
%time trips['bike_station_class_vec'] = trips['end_station_id'].swifter.apply(bikes_start_end_vectorized)

Wall time: 15.6 ms


In [20]:
trips.head()

Unnamed: 0,id,duration,start_date,start_station_name,start_station_id,end_date,end_station_name,end_station_id,bike_id,subscription_type,zip_code,bike_station_class,bike_station_class_vec
0,4576,63,8/29/2013 14:13,South Van Ness at Market,66,8/29/2013 14:14,South Van Ness at Market,66,520,Subscriber,94127,True,True
1,4607,70,8/29/2013 14:42,San Jose City Hall,10,8/29/2013 14:43,San Jose City Hall,10,661,Subscriber,95138,True,True
2,4130,71,8/29/2013 10:16,Mountain View City Hall,27,8/29/2013 10:17,Mountain View City Hall,27,48,Subscriber,97214,True,True
3,4251,77,8/29/2013 11:29,San Jose City Hall,10,8/29/2013 11:30,San Jose City Hall,10,26,Subscriber,95060,True,True
4,4299,83,8/29/2013 12:02,South Van Ness at Market,66,8/29/2013 12:04,Market at 10th,67,319,Subscriber,94103,True,True


#### When you can't write code in a vectorized format, swifter still makes parallel processing easy

In [22]:
%time trips['date'] = trips['start_date'].swifter.apply(pd.to_datetime)

Wall time: 48.8 s


In [24]:
trips.head()

Unnamed: 0,id,duration,start_date,start_station_name,start_station_id,end_date,end_station_name,end_station_id,bike_id,subscription_type,zip_code,bike_station_class,bike_station_class_vec,date
0,4576,63,8/29/2013 14:13,South Van Ness at Market,66,8/29/2013 14:14,South Van Ness at Market,66,520,Subscriber,94127,True,True,2013-08-29 14:13:00
1,4607,70,8/29/2013 14:42,San Jose City Hall,10,8/29/2013 14:43,San Jose City Hall,10,661,Subscriber,95138,True,True,2013-08-29 14:42:00
2,4130,71,8/29/2013 10:16,Mountain View City Hall,27,8/29/2013 10:17,Mountain View City Hall,27,48,Subscriber,97214,True,True,2013-08-29 10:16:00
3,4251,77,8/29/2013 11:29,San Jose City Hall,10,8/29/2013 11:30,San Jose City Hall,10,26,Subscriber,95060,True,True,2013-08-29 11:29:00
4,4299,83,8/29/2013 12:02,South Van Ness at Market,66,8/29/2013 12:04,Market at 10th,67,319,Subscriber,94103,True,True,2013-08-29 12:02:00


### Multiple columns apply example 

### Rolling objects apply example 

In [51]:
%time trips["rolling_sum_duration"] = trips['duration'].swifter.rolling(10).apply(sum)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Pandas Apply', max=1.0, style=ProgressS…


Wall time: 2min 36s


In [54]:
trips.iloc[0:40,:]

Unnamed: 0,id,duration,start_date,start_station_name,start_station_id,end_date,end_station_name,end_station_id,bike_id,subscription_type,zip_code,bike_station_class,bike_station_class_vec,date,rolling_sum_duration
0,4576,63,8/29/2013 14:13,South Van Ness at Market,66,8/29/2013 14:14,South Van Ness at Market,66,520,Subscriber,94127,True,True,2013-08-29 14:13:00,
1,4607,70,8/29/2013 14:42,San Jose City Hall,10,8/29/2013 14:43,San Jose City Hall,10,661,Subscriber,95138,True,True,2013-08-29 14:42:00,
2,4130,71,8/29/2013 10:16,Mountain View City Hall,27,8/29/2013 10:17,Mountain View City Hall,27,48,Subscriber,97214,True,True,2013-08-29 10:16:00,
3,4251,77,8/29/2013 11:29,San Jose City Hall,10,8/29/2013 11:30,San Jose City Hall,10,26,Subscriber,95060,True,True,2013-08-29 11:29:00,
4,4299,83,8/29/2013 12:02,South Van Ness at Market,66,8/29/2013 12:04,Market at 10th,67,319,Subscriber,94103,True,True,2013-08-29 12:02:00,
5,4927,103,8/29/2013 18:54,Golden Gate at Polk,59,8/29/2013 18:56,Golden Gate at Polk,59,527,Subscriber,94109,True,True,2013-08-29 18:54:00,
6,4500,109,8/29/2013 13:25,Santa Clara at Almaden,4,8/29/2013 13:27,Adobe on Almaden,5,679,Subscriber,95112,False,False,2013-08-29 13:25:00,
7,4563,111,8/29/2013 14:02,San Salvador at 1st,8,8/29/2013 14:04,San Salvador at 1st,8,687,Subscriber,95112,True,True,2013-08-29 14:02:00,
8,4760,113,8/29/2013 17:01,South Van Ness at Market,66,8/29/2013 17:03,South Van Ness at Market,66,553,Subscriber,94103,True,True,2013-08-29 17:01:00,
9,4258,114,8/29/2013 11:33,San Jose City Hall,10,8/29/2013 11:35,MLK Library,11,107,Subscriber,95060,True,True,2013-08-29 11:33:00,914.0
