In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re   

%matplotlib inline

In [2]:
bonus_posts_df = pd.read_csv('../data/bonus_post.csv')
bonus_posts_df = bonus_posts_df.iloc[:, 1:]

#### get bonus amount from glance

In [3]:
bonus_posts_df.head()

Unnamed: 0,title,post_link,glance,offer,fee
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa..."
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the..."


In [4]:
def get_bonus_from_glance(glance):
    try:
        bonus_amount = re.search(r'bonus amount:.*\$\d+.*', glance, re.I)
        bonus_amount = re.sub(r'\,', '', bonus_amount.group(), re.I)
        bonus_amount = re.findall(r'(\$\d+)', bonus_amount, re.I)
        
        for i in range(len(bonus_amount)):
            bonus_amount[i] = re.sub(r'\$', '', bonus_amount[i])
        bonus_amount = np.max(list(map(float, bonus_amount)))
        return bonus_amount
    except:
        return np.nan

bonus_posts_df['bonus'] = bonus_posts_df['glance'].apply(get_bonus_from_glance)

In [5]:
bonus_posts_df.head()

Unnamed: 0,title,post_link,glance,offer,fee,bonus
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0


#### get monthly direct deposit from glance

In [6]:
def get_dd_from_glance(glance):
    try:
        dd = re.search(r'Direct deposit required:.*', glance, re.I).group()
        
        if_dd = np.nan
        try:
            match = re.search(r':\s*no', dd, re.I)
            if match:
                if_dd = 0
            else:
                if_dd = 1
        except:
            if_dd = np.nan
        
        if if_dd == 1:
            amount = np.nan
            try:
                amount = re.sub(r'\,', '', dd, re.I)
                amount = re.findall(r'\$\d+', amount, re.I)
                for i in range(len(amount)):
                    amount[i] = re.sub(r'\$', '', amount[i])
                amount = np.max(list(map(float, amount)))
            except:
                amount = np.nan

            count_dict = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 
                          'eight': 8, 'nine': 9, 'ten': 10, 'eleven': 11, 'twelve': 12}
            pattern = r'one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve'

            count = 0 
            total = 0 

            try:
                count = re.search(pattern, dd, re.I)
                if count:
                    count = np.max([count_dict[c.lower()] for c in count.group().split()])
                else:
                    try:
                        match_days = re.search(r'day', dd, re.I)
                        if match_days:
                            match_days = re.search(r' (\d+) ', dd, re.I)
                            match_days = np.max(list(map(float, match_days.group().split())))/30
                            count = int(match_days)
                        else:
                            count = np.nan
                    except:
                        count = np.nan
            except:
                count = np.nan

            try:
                match_total = re.search(r'total', dd, re.I)     
                if match_total:
                    total = 1
                    try:
                        amount = amount/count
                    except:
                        print('wrong')
                else:
                    total = 0
            except:
                total = 0

            return amount, count
        else:
            return np.nan, np.nan
    except:
        return np.nan, np.nan

In [7]:
cols = ['dd_amount_per_time', 'dd_total_time']
cols = list(map(lambda x: 'glance_'+x, cols))
cols

['glance_dd_amount_per_time', 'glance_dd_total_time']

In [8]:
bonus_posts_df['glance_tmp'] = bonus_posts_df['glance'].apply(get_dd_from_glance)
bonus_posts_df

Unnamed: 0,title,post_link,glance,offer,fee,bonus,glance_tmp
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0,"(nan, nan)"
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0,"(nan, nan)"
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0,"(250.0, 2)"
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0,"(nan, nan)"
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0,"(2500.0, nan)"
...,...,...,...,...,...,...,...
208,Sharepoint Credit Union $150 Checking Bonus + ...,https://www.doctorofcredit.com/mn-sharepoint-c...,Maximum bonus amount: $150\nAvailability: MN o...,Sharepoint Credit Union is offering a bonus of...,"Account comes with a $5.95 monthly fee, this i...",150.0,"(300.0, nan)"
209,Addition Financial $300 Checking Bonus,https://www.doctorofcredit.com/fl-only-additio...,Maximum bonus amount: $300\nAvailability: Must...,Addition Financial is offering a bonus of $300...,"Looks like there is a $10 membership fee, not ...",300.0,"(nan, nan)"
210,Monson Savings Bank $100 Checking Bonus,https://www.doctorofcredit.com/ma-in-branch-mo...,Maximum bonus amount: $100\nAvailability: In b...,Monson Savings Bank is offering a bonus of $10...,They offer a free checking account with no mon...,100.0,"(500.0, nan)"
211,BMO Harris $700 Checking & Savings Bonus,https://www.doctorofcredit.com/il-wi-mn-in-bmo...,Maximum bonus amount: $700\nAvailability: you ...,BMO Harris is offering a bonus of up to $700 w...,Your best option is to open the BMO Smart Adva...,700.0,"(nan, nan)"


In [9]:
bonus_posts_df[cols] = pd.DataFrame(bonus_posts_df['glance_tmp'].values.tolist(), index= bonus_posts_df.index)

bonus_posts_df.head()

Unnamed: 0,title,post_link,glance,offer,fee,bonus,glance_tmp,glance_dd_amount_per_time,glance_dd_total_time
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0,"(nan, nan)",,
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0,"(nan, nan)",,
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0,"(250.0, 2)",250.0,2.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0,"(nan, nan)",,
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0,"(2500.0, nan)",2500.0,


#### get expiration date from glance

In [10]:
def get_expir_date(glance):
    try:
        expir_date = re.search(r'expiration date:.*', glance, re.I)
        expir_date = re.sub(r'[Ee]xpiration [Dd]ate:\s*', '', expir_date.group(), re.I)
        expir_date = re.sub(r'\,', '', expir_date)
        expir_date = re.sub(r'[/-]', ' ', expir_date)
        expir_date = re.findall(r'(\w+\s\w+\s\w+)', expir_date, re.I)
        
        try:
            return np.max([pd.to_datetime(day) for day in expir_date])
        except:
            return np.nan
    except:
        return np.nan

bonus_posts_df['expiration_date'] = bonus_posts_df['glance'].apply(get_expir_date)
bonus_posts_df.head()

Unnamed: 0,title,post_link,glance,offer,fee,bonus,glance_tmp,glance_dd_amount_per_time,glance_dd_total_time,expiration_date
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0,"(nan, nan)",,,2020-04-30
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0,"(nan, nan)",,,2020-03-31
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0,"(250.0, 2)",250.0,2.0,2020-02-28
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0,"(nan, nan)",,,2020-04-30
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0,"(2500.0, nan)",2500.0,,2020-02-10


In [11]:
# glances = list(bonus_posts_df['glance'])

def get_termin_time_from_glance(glance):
    count_dict = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 
                  'eight': 8, 'nine': 9, 'ten': 10, 'eleven': 11, 'twelve': 12}
    pattern = r'one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve'
    count = 0
    try:
        term = re.search(r'.*termination.*', glance, re.I)
        term = re.sub(r'[Ee]arly account termination fee:\s*', '', term.group(), re.I)

        match_year = re.search(r'year', term, re.I)
        if match_year:           
            try:
                count = re.search(r'(\w) year', term, re.I)
                count = re.search(r'(\d+)', term.group(), re.I)
                count = min(list(map(float, count.group().split())))*12
            except:
                count = re.search(pattern, term, re.I)
                if count:
                    count = max([count_dict[c.lower()] for c in count.group().split()])*12
                    
        match_months = re.search(r'month', term, re.I)
        if match_months:           
            try:
                count = re.search(r'(\w) month', term, re.I)
                count = re.search(r'(\d+)', term.group(), re.I)
                count = min(list(map(float, count.group().split())))
            except:
                count = re.search(pattern, term, re.I)
                if count:
                    count = max([count_dict[c.lower()] for c in count.group().split()])
                    
        match_days = re.search(r'day', term, re.I)
        if match_days:
            match_days = re.search(r' (\d+) ', term, re.I)
            count = max(list(map(float, match_days.group().split())))/30
        return int(count)
    except:
        return np.nan

bonus_posts_df['glance_keeping_time'] = bonus_posts_df['glance'].apply(get_termin_time_from_glance)
bonus_posts_df.head()

# for i in range(len(glances)):
#     get_termin_time_from_glance(glances[i])
#     print()


Unnamed: 0,title,post_link,glance,offer,fee,bonus,glance_tmp,glance_dd_amount_per_time,glance_dd_total_time,expiration_date,glance_keeping_time
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0,"(nan, nan)",,,2020-04-30,0.0
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0,"(nan, nan)",,,2020-03-31,3.0
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0,"(250.0, 2)",250.0,2.0,2020-02-28,6.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0,"(nan, nan)",,,2020-04-30,3.0
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0,"(2500.0, nan)",2500.0,,2020-02-10,12.0


In [12]:
# bonus_posts_df[~bonus_posts_df['expiration_date'].apply(np.isnat)]

### get bonus, dd amount, keep time from offer

In [13]:
def get_bonus_amount_from_offer(offer):
    try:
        bb = re.search(r'(bonus of.*\$\d+)|(\$\d+.*bonus)', offer, re.I)
        bb = re.sub(r'\$', '', bb.group(), re.I)
        bb = re.findall(r'\d+', bb, re.I)
        bb = max(list(map(float, bb)))
        return bb
    except:
        return np.nan
    
bonus_posts_df['offer_bonus_amount'] = bonus_posts_df['offer'].apply(get_bonus_amount_from_offer)
bonus_posts_df.head()

Unnamed: 0,title,post_link,glance,offer,fee,bonus,glance_tmp,glance_dd_amount_per_time,glance_dd_total_time,expiration_date,glance_keeping_time,offer_bonus_amount
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0,"(nan, nan)",,,2020-04-30,0.0,100.0
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0,"(nan, nan)",,,2020-03-31,3.0,100.0
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0,"(250.0, 2)",250.0,2.0,2020-02-28,6.0,200.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0,"(nan, nan)",,,2020-04-30,3.0,200.0
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0,"(2500.0, nan)",2500.0,,2020-02-10,12.0,400.0


In [14]:
def get_dd_amount_from_offer(offer):
    try:
        dd = re.search(r'(direct deposit.*\$\d+)|(\$\d+.*direct deposit)', offer, re.I)
        dd = re.sub(r'\$', '', dd.group(), re.I)
        dd = re.findall(r'\d+', dd, re.I)
        dd = max(list(map(float, dd)))
        if dd<50:
            dd = 0
        return dd
    except:
        return np.nan
    
bonus_posts_df['offer_dd_amount'] = bonus_posts_df['offer'].apply(get_dd_amount_from_offer)
bonus_posts_df.head()

# for i in range(len(bonus_posts_df)):
#     try:
#         dd = re.search(r'(direct deposit.*\$\d+)|(\$\d+.*direct deposit)', offers[i], re.I)
#         dd = re.sub(r'\$', '', dd.group(), re.I)
#         dd = re.findall(r'\d+', dd, re.I)
#         dd = max(list(map(float, dd)))
#         if dd<50:
#             dd = 0
#         return dd
#     except:
#         return np.nan

# get_dd_amount_from_offer(offers[1])

Unnamed: 0,title,post_link,glance,offer,fee,bonus,glance_tmp,glance_dd_amount_per_time,glance_dd_total_time,expiration_date,glance_keeping_time,offer_bonus_amount,offer_dd_amount
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0,"(nan, nan)",,,2020-04-30,0.0,100.0,
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0,"(nan, nan)",,,2020-03-31,3.0,100.0,
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0,"(250.0, 2)",250.0,2.0,2020-02-28,6.0,200.0,250.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0,"(nan, nan)",,,2020-04-30,3.0,200.0,100.0
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0,"(2500.0, nan)",2500.0,,2020-02-10,12.0,400.0,2020.0


In [15]:
def get_keep_time_from_offer(offer):
    try:
        count_day = re.search(r'\d+ days',offer, re.I)
        count_day = re.findall(r'\d+', count_day.group(), re.I)
        count_day = max(list(map(float, count_day)))/31
        return int(count_day)
    except:
        return np.nan
    
bonus_posts_df['offer_keep_time'] = bonus_posts_df['offer'].apply(get_keep_time_from_offer)
bonus_posts_df.head()

Unnamed: 0,title,post_link,glance,offer,fee,bonus,glance_tmp,glance_dd_amount_per_time,glance_dd_total_time,expiration_date,glance_keeping_time,offer_bonus_amount,offer_dd_amount,offer_keep_time
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0,"(nan, nan)",,,2020-04-30,0.0,100.0,,
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0,"(nan, nan)",,,2020-03-31,3.0,100.0,,0.0
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0,"(250.0, 2)",250.0,2.0,2020-02-28,6.0,200.0,250.0,2.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0,"(nan, nan)",,,2020-04-30,3.0,200.0,100.0,
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0,"(2500.0, nan)",2500.0,,2020-02-10,12.0,400.0,2020.0,


### get keeping time from fees

In [16]:
fees = bonus_posts_df['fee']

def get_keep_time_from_fee(fee):
    try:
        count_day = re.search(r'\d+ days', fee, re.I)
        count_day = re.findall(r'\d+', count_day.group(), re.I)
        count_day = max(list(map(float, count_day)))/31
        return int(count_day)
    except:
        return np.nan
    
bonus_posts_df['fee_keep_time'] = bonus_posts_df['offer'].apply(get_keep_time_from_fee)
bonus_posts_df.head()
        


# get_dd_amount_from_offer(offers[1])

Unnamed: 0,title,post_link,glance,offer,fee,bonus,glance_tmp,glance_dd_amount_per_time,glance_dd_total_time,expiration_date,glance_keeping_time,offer_bonus_amount,offer_dd_amount,offer_keep_time,fee_keep_time
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0,"(nan, nan)",,,2020-04-30,0.0,100.0,,,
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0,"(nan, nan)",,,2020-03-31,3.0,100.0,,0.0,0.0
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0,"(250.0, 2)",250.0,2.0,2020-02-28,6.0,200.0,250.0,2.0,2.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0,"(nan, nan)",,,2020-04-30,3.0,200.0,100.0,,
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0,"(2500.0, nan)",2500.0,,2020-02-10,12.0,400.0,2020.0,,


### combine features and clean dataframe

In [17]:
df = bonus_posts_df

In [18]:
df['bonus'].fillna(df['offer_bonus_amount'], inplace=True)

In [19]:
df['monthly_dd'] = df['glance_dd_amount_per_time'].map(str) + ' ' + df['offer_dd_amount'].map(str)
df['monthly_dd'] = df['monthly_dd'].apply(lambda x: max(list(map(float, x.split()))))

In [20]:
df['glance_dd_total_time'].fillna(0, inplace=True)
df['glance_keeping_time'].fillna(0, inplace=True)
df['offer_keep_time'].fillna(0, inplace=True)
df['fee_keep_time'].fillna(0, inplace=True)

In [21]:
df['keep_time'] = df['glance_dd_total_time'].map(str) + ' ' + df['glance_keeping_time'].map(str)
df['keep_time'] = df['keep_time'].map(str) + ' ' + df['offer_keep_time'].map(str)
df['keep_time'] = df['keep_time'].map(str) + ' ' + df['fee_keep_time'].map(str)
df['keep_time'] = df['keep_time'].apply(lambda x: max(list(map(float, x.split()))))

In [22]:
df

Unnamed: 0,title,post_link,glance,offer,fee,bonus,glance_tmp,glance_dd_amount_per_time,glance_dd_total_time,expiration_date,glance_keeping_time,offer_bonus_amount,offer_dd_amount,offer_keep_time,fee_keep_time,monthly_dd,keep_time
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,Maximum bonus amount: $100\nAvailability: VA o...,Chesapeake Bank is offering a bonus of $100 wh...,The totally free checking account has no month...,100.0,"(nan, nan)",,0.0,2020-04-30,0.0,100.0,,0.0,0.0,,0.0
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,Maximum bonus amount: $100\nAvailability: MO o...,Arsenal Credit Union is offering a bonus of $1...,This account has no monthly fees to worry abou...,100.0,"(nan, nan)",,0.0,2020-03-31,3.0,100.0,,0.0,0.0,,3.0
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,Maximum bonus amount: $200\nAvailability: MA o...,The Cooperative Bank is offering a bonus of $2...,"This account has a $10 monthly fee, this is wa...",200.0,"(250.0, 2)",250.0,2.0,2020-02-28,6.0,200.0,250.0,2.0,2.0,250.0,6.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,Maximum bonus amount: $200\nAvailability:Must ...,Old Second Bank is offering a $200 checking bo...,Money connection account has a $3.95 fee if yo...,200.0,"(nan, nan)",,0.0,2020-04-30,3.0,200.0,100.0,0.0,0.0,,3.0
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,Maximum bonus amount: $450\nAvailability: Nati...,BBVA Compass is offering a checking bonus of $...,"If you live in AL, AZ, CA, CO, FL NM or TX the...",450.0,"(2500.0, nan)",2500.0,0.0,2020-02-10,12.0,400.0,2020.0,0.0,0.0,2500.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208,Sharepoint Credit Union $150 Checking Bonus + ...,https://www.doctorofcredit.com/mn-sharepoint-c...,Maximum bonus amount: $150\nAvailability: MN o...,Sharepoint Credit Union is offering a bonus of...,"Account comes with a $5.95 monthly fee, this i...",150.0,"(300.0, nan)",300.0,0.0,2019-09-30,6.0,300.0,300.0,0.0,0.0,300.0,6.0
209,Addition Financial $300 Checking Bonus,https://www.doctorofcredit.com/fl-only-additio...,Maximum bonus amount: $300\nAvailability: Must...,Addition Financial is offering a bonus of $300...,"Looks like there is a $10 membership fee, not ...",300.0,"(nan, nan)",,0.0,2019-09-29,0.0,300.0,,1.0,1.0,,1.0
210,Monson Savings Bank $100 Checking Bonus,https://www.doctorofcredit.com/ma-in-branch-mo...,Maximum bonus amount: $100\nAvailability: In b...,Monson Savings Bank is offering a bonus of $10...,They offer a free checking account with no mon...,100.0,"(500.0, nan)",500.0,0.0,NaT,0.0,100.0,500.0,0.0,0.0,500.0,0.0
211,BMO Harris $700 Checking & Savings Bonus,https://www.doctorofcredit.com/il-wi-mn-in-bmo...,Maximum bonus amount: $700\nAvailability: you ...,BMO Harris is offering a bonus of up to $700 w...,Your best option is to open the BMO Smart Adva...,700.0,"(nan, nan)",,0.0,2019-08-31,3.0,700.0,0.0,3.0,3.0,,3.0


In [23]:
final_cols = ['title', 'post_link', 'bonus', 'monthly_dd', 'expiration_date', 'keep_time']
df = df[final_cols]
df.head()

Unnamed: 0,title,post_link,bonus,monthly_dd,expiration_date,keep_time
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,100.0,,2020-04-30,0.0
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,100.0,,2020-03-31,3.0
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,200.0,250.0,2020-02-28,6.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,200.0,,2020-04-30,3.0
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,450.0,2500.0,2020-02-10,12.0


In [24]:
def keeping(x):
    if x == 0:
        return 12
    return x

df['keep_time'] = df['keep_time'].apply(keeping)
df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,title,post_link,bonus,monthly_dd,expiration_date,keep_time
0,Chesapeake Bank $100 Savings Bonus,https://www.doctorofcredit.com/va-only-chesape...,100.0,,2020-04-30,12.0
1,Arsenal Credit Union $100 Checking Bonus,https://www.doctorofcredit.com/mo-only-arsenal...,100.0,,2020-03-31,3.0
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,200.0,250.0,2020-02-28,6.0
3,Old Second Bank $200 Checking Bonus,https://www.doctorofcredit.com/il-only-old-sec...,200.0,,2020-04-30,3.0
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,450.0,2500.0,2020-02-10,12.0


In [25]:
df = df[~(df['monthly_dd']==0)]

In [26]:
df = df.dropna()
df

Unnamed: 0,title,post_link,bonus,monthly_dd,expiration_date,keep_time
2,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,200.0,250.0,2020-02-28,6.0
4,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,450.0,2500.0,2020-02-10,12.0
6,South State Bank $200 Checking Bonus,https://www.doctorofcredit.com/ga-nc-sc-south-...,200.0,250.0,2019-11-30,1.0
9,Teachers Credit Union $200 Checking Bonus,https://www.doctorofcredit.com/in-mi-teachers-...,200.0,500.0,2020-03-20,12.0
11,Hancock Whitney Bank $300 Checking Bonus,https://www.doctorofcredit.com/la-ms-fl-al-tx-...,300.0,250.0,2020-03-31,6.0
...,...,...,...,...,...,...
196,Royal Credit Union $200 Checking Bonus,https://www.doctorofcredit.com/mn-wi-royal-cre...,200.0,200.0,2019-09-30,6.0
201,HomeTrust Bank $150 Bonus + $5 Monthly,https://www.doctorofcredit.com/nc-sc-tn-va-hom...,150.0,250.0,2019-11-15,12.0
203,Bank of America $350 Checking Bonus,https://www.doctorofcredit.com/id-ky-la-wi-ban...,350.0,4000.0,2019-09-15,3.0
207,Dedham Savings Bank $250 Checking Bonus,https://www.doctorofcredit.com/ma-only-dedham-...,250.0,500.0,2019-12-31,2.0


In [27]:
from datetime import *
time_filter = df['expiration_date']>=pd.to_datetime(datetime.today())
df = df[time_filter]

In [28]:
df.reset_index(drop=True, inplace=True)

In [30]:
df

Unnamed: 0,title,post_link,bonus,monthly_dd,expiration_date,keep_time
0,The Cooperative Bank $200 Checking Bonus,https://www.doctorofcredit.com/ma-only-the-coo...,200.0,250.0,2020-02-28,6.0
1,BBVA $400 Checking Bonus + $50 Savings Bonus &...,https://www.doctorofcredit.com/bbva-400-checki...,450.0,2500.0,2020-02-10,12.0
2,Teachers Credit Union $200 Checking Bonus,https://www.doctorofcredit.com/in-mi-teachers-...,200.0,500.0,2020-03-20,12.0
3,Hancock Whitney Bank $300 Checking Bonus,https://www.doctorofcredit.com/la-ms-fl-al-tx-...,300.0,250.0,2020-03-31,6.0
4,HSBC $700 Checking Bonus,https://www.doctorofcredit.com/hsbc-700-checki...,700.0,700.0,2020-03-01,6.0
5,Seacoast Bank $350 Checking & $150 Savings Bonus,https://www.doctorofcredit.com/fl-in-branch-on...,500.0,500.0,2020-04-30,3.0
6,Incredible Bank $102 Checking Bonus,https://www.doctorofcredit.com/incredible-bank...,102.0,300.0,2020-02-22,2.0
7,Bank Of The West $250 Checking Bonus,https://www.doctorofcredit.com/bank-of-the-wes...,250.0,333.333333,2020-02-28,3.0
8,People’s United Bank $100 Checking Bonus & Sho...,https://www.doctorofcredit.com/peoples-united-...,100.0,1000.0,2020-03-31,3.0
9,Northwest Federal Credit Union $300 Checking B...,https://www.doctorofcredit.com/ballston-va-bra...,300.0,500.0,2020-02-15,3.0


In [29]:
df.to_csv('../data/real_time_bonuses.csv')