In [40]:
import pandas as pd
import numpy as np

In [130]:
df = pd.read_pickle('sef_donors_cleaned.pkl')

df = df[['donor_id', 'donation_date', 'amount', 'appeal', 'charitable', 'fund', 'city',
         'state', 'county', 'zipcode', 'latitude', 'longitude',  'timezone']]

### Feature engineering:
1. Add columns for year, month and day of week of donation date
2. Add column for list of consecutive years for donation
3. Add column for the size of the list from #2 above

In [131]:
df['donation_year'] = df.donation_date.apply(lambda x: x.year)
df['donation_month'] = df.donation_date.apply(lambda x: x.year)
df['donation_dow'] = df.donation_date.apply(lambda x: x.dayofweek)

In [132]:
def get_repeat_donation_year_bins(years):
    '''
    input: list of years in which donations were made
    output: bins of consequent years during which the user made donations
    example:
        get_repeat_donation_years([2002, 2003, 2004, 2006, 2010, 2011, 2013]) => [(2002, 2004), (2010, 2011)]
    '''
    maxyear = 2015
    if maxyear in years:
        np.append(years, maxyear+1)
    bins = zip([x for x in years if x-1 not in years], [x for x in years if x+1 not in years])
    bins = [(x,y) for x,y in bins if x != y]
    return bins

In [133]:
# Compute and add the list of bins of years in which consecutive donations were made
repeat_year_bins = df.groupby(['donor_id']).donation_year.unique().apply(get_repeat_donation_year_bins)
df.set_index('donor_id', inplace=True)
df = df.join(repeat_year_bins, rsuffix='_y')
df.reset_index(inplace=True)
df.rename(columns={'donation_year_y': 'donation_year_bins'}, inplace=True)

In [141]:
df['donation_year_bincount'] = df.donation_year_bins.apply(len)

In [147]:
def is_recurring_donation(row):
    '''
    Input: List of tuples representing bins of continous years made by the donor, year of donation
    Returns: True if the year appears in any of the bins, false otherwise
    '''
    return np.any([row.donation_year >= b[0] and row.donation_year <= b[1] for b in row.donation_year_bins])

In [149]:
df['is_recurring_donation'] = df.apply(is_recurring_donation, axis=1)