In [0]:
import pandas as pd
import numpy as np
import math
import json
import xgboost as xg
from sklearn.preprocessing import LabelEncoder
from matplotlib import pyplot
import matplotlib.pyplot as plt
import warnings
from google.oauth2 import service_account #For GCP Account connection
from google.cloud import bigquery


%matplotlib inline

warnings.filterwarnings(action='once')

#Setting Up Project ID
project_id = "dsmt-team5-finalproject"
#Setting up the credential file
cred = service_account.Credentials.from_service_account_file('dsmt-team5-finalproject-4f1119faadf9.json')
#Setting up a BigQuery Client:
client = bigquery.Client(project=project_id, credentials=cred)

In [0]:
transcript_sql = """SELECT * FROM `dsmt-team5-finalproject.tacobell.transcript`"""
transcript = client.query(transcript_sql).to_dataframe()
profile_sql = """SELECT * FROM `dsmt-team5-finalproject.tacobell.profile`"""
profile = client.query(profile_sql).to_dataframe()
portfolio_sql = """SELECT * FROM `dsmt-team5-finalproject.tacobell.portfolio`"""
portfolio = client.query(portfolio_sql).to_dataframe()

In [3]:
portfolio

Unnamed: 0,channels,difficulty,duration,id,offer_type,reward
0,"[email, mobile, social]",10,7,ae264e3637204a6fb9bb56bc8210ddfd,bogo,10
1,"[web, email, mobile, social]",10,5,4d5c57ea9a6940dd891ad53e9dbe8da0,bogo,10
2,"[web, email, mobile]",0,4,3f207df678b143eea3cee63160fa8bed,informational,0
3,"[web, email, mobile]",5,7,9b98b8c7a33c4b65b9aebfe6a799e6d9,bogo,5
4,"[web, email]",20,10,0b1e1539f2cc45b7b9fa7c272da2e1d7,discount,5
5,"[web, email, mobile, social]",7,7,2298d6c36e964ae4a3e7e9706d1fb8c2,discount,3
6,"[web, email, mobile, social]",10,10,fafdcd668e3743c1bb461111dcafc2a4,discount,2
7,"[email, mobile, social]",0,3,5a8bc65990b245e5a138643cd4eb9837,informational,0
8,"[web, email, mobile, social]",5,5,f19421c1d4aa40978ebb69ca19b0e20d,bogo,5
9,"[web, email, mobile]",10,7,2906b810c7d4411798c6938adc9daaa5,discount,2


In [4]:
profile.head(10)

Unnamed: 0,age,became_member_on,gender,id,income
0,118,20170212,,68be06ca386d4c31939f3a4f0e3dd783,
1,55,20170715,F,0610b486422d4921ae7d2bf64640c50b,112000.0
2,118,20180712,,38fe809add3b4fcf9315a9694bb96ff5,
3,75,20170509,F,78afa995795e4d85b5d9ceeca43f5fef,100000.0
4,118,20170804,,a03223e636434f42ac4c3df47e8bac43,
5,68,20180426,M,e2127556f4f64592b11af22de27a7932,70000.0
6,118,20170925,,8ec6ce2a7e7949b1bf142def7d0e0586,
7,118,20171002,,68617ca6246f4fbc85e91a2a49552598,
8,65,20180209,M,389bc3fa690240e798340f5a15918d5c,53000.0
9,118,20161122,,8974fc5686fe429db53ddde067b88302,


In [5]:
profile.isnull().mean()

age                 0.000000
became_member_on    0.000000
gender              0.127941
id                  0.000000
income              0.127941
dtype: float64

In [0]:
profile = profile.dropna()

In [7]:
profile.isnull().mean()

age                 0.0
became_member_on    0.0
gender              0.0
id                  0.0
income              0.0
dtype: float64

In [8]:
transcript.head()

Unnamed: 0,event,person,time,value
0,offer received,78afa995795e4d85b5d9ceeca43f5fef,0,{'offer id': '9b98b8c7a33c4b65b9aebfe6a799e6d9'}
1,offer received,a03223e636434f42ac4c3df47e8bac43,0,{'offer id': '0b1e1539f2cc45b7b9fa7c272da2e1d7'}
2,offer received,e2127556f4f64592b11af22de27a7932,0,{'offer id': '2906b810c7d4411798c6938adc9daaa5'}
3,offer received,8ec6ce2a7e7949b1bf142def7d0e0586,0,{'offer id': 'fafdcd668e3743c1bb461111dcafc2a4'}
4,offer received,68617ca6246f4fbc85e91a2a49552598,0,{'offer id': '4d5c57ea9a6940dd891ad53e9dbe8da0'}


In [9]:
transcript.shape[0]

306534

In [0]:
transcript = transcript[transcript["person"].isin(profile["id"]) ]

In [11]:
transcript.head()

Unnamed: 0,event,person,time,value
0,offer received,78afa995795e4d85b5d9ceeca43f5fef,0,{'offer id': '9b98b8c7a33c4b65b9aebfe6a799e6d9'}
2,offer received,e2127556f4f64592b11af22de27a7932,0,{'offer id': '2906b810c7d4411798c6938adc9daaa5'}
5,offer received,389bc3fa690240e798340f5a15918d5c,0,{'offer id': 'f19421c1d4aa40978ebb69ca19b0e20d'}
7,offer received,2eeac8d8feae4a8cad5a6af0499a211d,0,{'offer id': '3f207df678b143eea3cee63160fa8bed'}
8,offer received,aa4862eba776480b8bb9c68455b8c2e1,0,{'offer id': '0b1e1539f2cc45b7b9fa7c272da2e1d7'}


In [12]:
transcript.shape[0]

272762

# **Pre-process data**

In [0]:
import math
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils import resample
import matplotlib.pyplot as plt
import warnings
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle

def encode_person_id(profile_df, transcript_df):
   
    #Label encode hash values of id column for profile dataframe and person column for 
    #transcript dataframe. Generates per_id variable.
    
    person_encoder = LabelEncoder()
    profile_df['per_id'] = person_encoder.fit_transform(profile_df['id'])
    transcript_df['per_id'] = person_encoder.transform(transcript_df['person'])
    
    profile_df.drop(['id'], axis=1, inplace=True)
    transcript_df.drop(['person'], axis=1, inplace=True)
    
    return profile_df, transcript_df, person_encoder

def encode_offer_id(portfolio_df, transcript_offer_df):
   
    #Encode hash values of id column for portfolio dataframe and value_id_amt column for 
    #transcript_offer dataframe. Generates offer_id.
    
    offer_encoder = LabelEncoder()
    portfolio_df['offer_id'] = offer_encoder.fit_transform(portfolio_df['id'])
    transcript_offer_df['offer_id'] = offer_encoder.transform(transcript_offer_df['value_id_amt'])
    
    portfolio_df.drop(['id'], axis=1, inplace=True)
    transcript_offer_df.drop(['value_type', 'value_id_amt'], axis=1, inplace=True)
    
    return portfolio_df, transcript_offer_df, offer_encoder


def convert_data_type(df, int_var_list=None, float_var_list=None, str_var_list=None):

    #Ensure data types are consistent. Convert values in dataframe 
    #into desired data types.
    
    if int_var_list != None:
        for int_var in int_var_list:
            df[int_var] = df[int_var].apply(lambda x: int(x))
        
    if float_var_list != None:
        for float_var in float_var_list:
            df[float_var] = df[float_var].apply(lambda x: float(x))
        
    if str_var_list != None:
        for str_var in str_var_list:
            df[str_var] = df[str_var].apply(lambda x: str(x))
        
    return df

def encode_channel(portfolio_df):

    #Extract and perform one-hot encodings for the available channels in portfolio

    # find all available channels
    available_channels = []
    for channel_list in portfolio_df['channels']:
        for channel in channel_list:
            if channel not in available_channels:
                available_channels.append(channel)
    
    # one-hot encode channel
    for channel in available_channels:
        portfolio_df[channel] = portfolio_df['channels'].apply(lambda x: 1 if channel in x else 0)
    
    # drop channels column
    portfolio_df.drop(['channels'], axis=1, inplace=True)
    
    return portfolio_df

In [14]:
profile, transcript, person_enocoder = encode_person_id(profile, transcript)
profile.head()

Unnamed: 0,age,became_member_on,gender,income,per_id
1,55,20170715,F,112000.0,346
3,75,20170509,F,100000.0,6962
5,68,20180426,M,70000.0,13107
8,65,20180209,M,53000.0,3246
12,58,20171111,M,51000.0,2669


In [15]:
profile[profile['per_id']==7996].head()

Unnamed: 0,age,became_member_on,gender,income,per_id
12243,20,20151011,M,55000.0,7996


In [0]:
# determine whether row deals with offer or transcation
transcript['value_type'] = transcript['value'].apply(lambda x : list(x.keys())[0])

# extract offer id if its an id, or transcation amount if it is a transcation
transcript['value_id_amt'] = transcript['value'].apply(lambda x : list(x.values())[0])

In [0]:
# ensure age, per_id is int
# ensure gender is string
# ensure income is float

int_var_list = ['age', 'per_id']
float_var_list = ['income']
str_var_list = ['gender']

profile = convert_data_type(profile, int_var_list, float_var_list, str_var_list)

int_var_list = ['time', 'per_id']
str_var_list = ['event', 'value_type']

transcript = convert_data_type(transcript, int_var_list=int_var_list, str_var_list=str_var_list)



In [0]:
#transcript = transcript[transcript['per_id'].isin([7996, 10735, 15043, 169, 10701])]
#transcript = transcript[transcript['per_id'].isin([7996])]

In [19]:
# get transcript for offers only
transcript_offer = transcript[(transcript['value_type']=='offer id') | (transcript['value_type']=='offer_id')]
transcript_offer.drop(['value'], axis=1, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [20]:
transcript_offer.head(20)

Unnamed: 0,event,time,per_id,value_type,value_id_amt
0,offer received,0,6962,offer id,9b98b8c7a33c4b65b9aebfe6a799e6d9
2,offer received,0,13107,offer id,2906b810c7d4411798c6938adc9daaa5
5,offer received,0,3246,offer id,f19421c1d4aa40978ebb69ca19b0e20d
7,offer received,0,2669,offer id,3f207df678b143eea3cee63160fa8bed
8,offer received,0,9940,offer id,0b1e1539f2cc45b7b9fa7c272da2e1d7
9,offer received,0,2860,offer id,0b1e1539f2cc45b7b9fa7c272da2e1d7
12,offer received,0,4366,offer id,ae264e3637204a6fb9bb56bc8210ddfd
13,offer received,0,11315,offer id,3f207df678b143eea3cee63160fa8bed
14,offer received,0,12359,offer id,0b1e1539f2cc45b7b9fa7c272da2e1d7
15,offer received,0,14351,offer id,fafdcd668e3743c1bb461111dcafc2a4


In [21]:
# get transcript for transactions only
transcript_trans = transcript[transcript['value_type']=='amount']
transcript_trans.drop(['event', 'value', 'value_type'], axis=1, inplace=True)
transcript_trans.rename(index=str, columns={"time":"time_spent", "value_id_amt":"amt_spent"}, inplace=True)
transcript_trans.head(30)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Unnamed: 0,time_spent,per_id,amt_spent
12654,0,148,0.83
12657,0,9326,34.56
12659,0,4920,13.23
12670,0,10445,19.51
12671,0,14742,18.97
12678,0,5705,33.9
12687,0,9901,18.59
12691,0,5991,18.01
12696,0,8337,19.11
12706,0,4460,36.19


In [22]:
portfolio, transcript_offer, offer_encoder = encode_offer_id(portfolio, transcript_offer)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [23]:
portfolio = encode_channel(portfolio)
portfolio

Unnamed: 0,difficulty,duration,offer_type,reward,offer_id,email,mobile,social,web
0,10,7,bogo,10,7,1,1,1,0
1,10,5,bogo,10,4,1,1,1,1
2,0,4,informational,0,3,1,1,0,1
3,5,7,bogo,5,6,1,1,0,1
4,20,10,discount,5,0,1,0,0,1
5,7,7,discount,3,1,1,1,1,1
6,10,10,discount,2,9,1,1,1,1
7,0,3,informational,0,5,1,1,1,0
8,5,5,bogo,5,8,1,1,1,1
9,10,7,discount,2,2,1,1,0,1


In [24]:
portfolio['duration'] = portfolio['duration'] * 24
portfolio

Unnamed: 0,difficulty,duration,offer_type,reward,offer_id,email,mobile,social,web
0,10,168,bogo,10,7,1,1,1,0
1,10,120,bogo,10,4,1,1,1,1
2,0,96,informational,0,3,1,1,0,1
3,5,168,bogo,5,6,1,1,0,1
4,20,240,discount,5,0,1,0,0,1
5,7,168,discount,3,1,1,1,1,1
6,10,240,discount,2,9,1,1,1,1
7,0,72,informational,0,5,1,1,1,0
8,5,120,bogo,5,8,1,1,1,1
9,10,168,discount,2,2,1,1,0,1


In [0]:
# merge transcript_offer with portfolio
transcript_offer = pd.merge(transcript_offer, portfolio, how='left', on='offer_id')

I will now split transcript_offer into 3 different dataframes: 
1. One tracking when offers are received, 
2. the other tracking when offers are viewed 
3. and the last tracking when offers are completed. 

Doing so will allow us to merge all 3 together to obtain a summary of the offers. 


Some of the offers obtained through the merging process will be nonsensical offers. We will need to eliminate such offers.

In [26]:
# generate dataframe tracking offers received
transcript_offer_received = transcript_offer[transcript_offer['event']=='offer received']
transcript_offer_received.rename(index=str, columns={"time":"time_received"}, inplace=True)
transcript_offer_received.drop(['event'], axis=1, inplace=True)
transcript_offer_received.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web
0,0,6962,6,5,168,bogo,5,1,1,0,1
1,0,13107,2,10,168,discount,2,1,1,0,1
2,0,3246,8,5,120,bogo,5,1,1,1,1
3,0,2669,3,0,96,informational,0,1,1,0,1
4,0,9940,0,20,240,discount,5,1,0,0,1
5,0,2860,0,20,240,discount,5,1,0,0,1
6,0,4366,7,10,168,bogo,10,1,1,1,0
7,0,11315,3,0,96,informational,0,1,1,0,1
8,0,12359,0,20,240,discount,5,1,0,0,1
9,0,14351,9,10,240,discount,2,1,1,1,1


In [27]:
# generate dataframe tracking offers viewed
transcript_offer_viewed = transcript_offer[transcript_offer['event']=='offer viewed']
transcript_offer_viewed.rename(index=str, columns={"time":"time_viewed"}, inplace=True)
cols_to_drop = ['event', 'difficulty', 'duration', 'offer_type', 'reward',\
                'email', 'mobile', 'social', 'web']
transcript_offer_viewed.drop(cols_to_drop, axis=1, inplace=True)
transcript_offer_viewed.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


Unnamed: 0,time_viewed,per_id,offer_id
11023,0,3246,8
11024,0,12181,5
11025,0,921,4
11026,0,148,7
11027,0,11107,5
11028,0,9326,2
11030,0,8159,1
11031,0,13290,1
11032,0,2146,8
11033,0,14393,7


In [28]:
# generate dataframe tracking offers completed
transcript_offer_completed = transcript_offer[transcript_offer['event']=='offer completed']
transcript_offer_completed.rename(index=str, columns={"time":"time_completed"}, inplace=True)
transcript_offer_completed.drop(cols_to_drop, axis=1, inplace=True)
transcript_offer_completed.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


Unnamed: 0,time_completed,per_id,offer_id
11029,0,9326,2
11037,0,14742,9
11042,0,5705,6
11048,0,5991,7
11051,0,8337,4
11066,0,1927,4
11069,0,10927,1
11088,0,12463,8
11103,0,6705,2
11105,0,3617,4


In [29]:
all_offers = pd.merge(transcript_offer_received, transcript_offer_viewed, on=['per_id', 'offer_id'], how='left')
all_offers = pd.merge(all_offers, transcript_offer_completed, on=['per_id', 'offer_id'], how='left')
all_offers.head(10)

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,time_viewed,time_completed
0,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0
1,0,13107,2,10,168,discount,2,1,1,0,1,18.0,
2,0,3246,8,5,120,bogo,5,1,1,1,1,0.0,60.0
3,0,3246,8,5,120,bogo,5,1,1,1,1,0.0,600.0
4,0,3246,8,5,120,bogo,5,1,1,1,1,504.0,60.0
5,0,3246,8,5,120,bogo,5,1,1,1,1,504.0,600.0
6,0,2669,3,0,96,informational,0,1,1,0,1,,
7,0,9940,0,20,240,discount,5,1,0,0,1,18.0,
8,0,2860,0,20,240,discount,5,1,0,0,1,150.0,
9,0,4366,7,10,168,bogo,10,1,1,1,0,102.0,510.0


At this stage, alot of offers generated through the merging process are false offers. We will try to eliminate these offers.

We will now keep only offers that meet the following conditions:

1. (time offer viewed > time offer received) and (time offer completed > time offer viewed)

2. (time offer viewed > time offer received) and (time completed is null)

3. both time offer viewed and time offer completed are null

Offers that do not meet these conditions are false offers and will be discarded.

In [30]:
# add time of offer expiry column
all_offers['time_expiry'] = all_offers['time_received'] + all_offers['duration']

# add time_viewed > time_received and time_completed is null
# if time_viewed is null and time_completed is null
all_offers = all_offers[((all_offers['time_viewed'] >= all_offers['time_received']) &\
                        (all_offers['time_completed'] >= all_offers['time_viewed']))
                        |
                        ((all_offers['time_viewed'] >= all_offers['time_received']) &\
                        all_offers['time_completed'].isnull())
                        |
                        (all_offers['time_viewed'].isnull() &\
                         all_offers['time_completed'].isnull())
                       ]


all_offers.head(10)

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,time_viewed,time_completed,time_expiry
0,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168
1,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168
2,0,3246,8,5,120,bogo,5,1,1,1,1,0.0,60.0,120
3,0,3246,8,5,120,bogo,5,1,1,1,1,0.0,600.0,120
5,0,3246,8,5,120,bogo,5,1,1,1,1,504.0,600.0,120
6,0,2669,3,0,96,informational,0,1,1,0,1,,,96
7,0,9940,0,20,240,discount,5,1,0,0,1,18.0,,240
8,0,2860,0,20,240,discount,5,1,0,0,1,150.0,,240
9,0,4366,7,10,168,bogo,10,1,1,1,0,102.0,510.0,168
10,0,4366,7,10,168,bogo,10,1,1,1,0,510.0,510.0,168


Now, we will attempt to classify the offers to their probable outcomes.

1. Offers that are probably successful will be offers that are viewed sometime after they are received but before the offer is completed. This means that the customer is aware of an offer and is making transactions as a result of the offer. In addition, the offer must be completed before it expires. Likewise the time of vewing must also come before the time of completion.

2. Offers that are probably tried are offers that are either

-received, viewed but completed after time of expiration or
-received, viewed before the offer expires but the offer is not completed.
-Offers that have failed or are false offers will be offers that do not belong to any of the above two classes.

Right now, the classifications are just probable. We will check if there are any transactions that occur during these offers

In [0]:
def probable_successful_offers(df):
    if ((df['time_received'] <= df['time_viewed']) &\
        (df['time_viewed'] <= df['time_completed']) &\
        (df['time_completed'] <= df['time_expiry'])):
        return 1
    else:
        return 0
    
def probable_tried_offers(df):
    if (((df['time_received'] <= df['time_viewed']) &\
         (df['time_viewed'] <= df['time_expiry']) &\
         (df['time_expiry'] < df['time_completed']))
        |\
        ((df['time_received'] <= df['time_viewed']) &\
         (df['time_viewed'] <= df['time_expiry']) &\
         (math.isnan(df['time_completed'])))):
        return 1
    else:
        return 0
    
def probable_failed_offers(df):
    if ((df['successful_offer'] == 1) | (df['tried_offer'] == 1)):
        return 0
    else:
        return 1

all_offers['successful_offer'] = all_offers.apply(probable_successful_offers, axis=1)
all_offers['tried_offer'] = all_offers.apply(probable_tried_offers, axis=1)
all_offers['failed_offer'] = all_offers.apply(probable_failed_offers, axis=1)

We will drop all duplicated entries including those with duplicated time of receival, person id and offer id. No indivduals received the same offer twice during the same day.

These duplicated entries are eraneous entries generated from the merging process.

In [32]:
# check if customers can receive the same offer type more than once in the same day
transcript_offer_received[transcript_offer_received.duplicated(subset=['time_received', 'per_id', 'offer_id'])]

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web


In [33]:
# drop all rows with duplicated per_id, offer_id and time_received
# very likely they are double counting
all_offers.drop_duplicates(subset=['time_received', 'per_id', 'offer_id'], keep='first', inplace=True)

print(all_offers.shape)
print(transcript_offer_received.shape)

(55747, 17)
(66501, 11)


In [34]:
all_offers.head(10)

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,time_viewed,time_completed,time_expiry,successful_offer,tried_offer,failed_offer
0,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0
1,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0
2,0,3246,8,5,120,bogo,5,1,1,1,1,0.0,60.0,120,1,0,0
6,0,2669,3,0,96,informational,0,1,1,0,1,,,96,0,0,1
7,0,9940,0,20,240,discount,5,1,0,0,1,18.0,,240,0,1,0
8,0,2860,0,20,240,discount,5,1,0,0,1,150.0,,240,0,1,0
9,0,4366,7,10,168,bogo,10,1,1,1,0,102.0,510.0,168,0,1,0
11,0,11315,3,0,96,informational,0,1,1,0,1,18.0,,96,0,1,0
13,0,12359,0,20,240,discount,5,1,0,0,1,30.0,30.0,240,1,0,0
17,0,14351,9,10,240,discount,2,1,1,1,1,18.0,126.0,240,1,0,0


In [35]:
# get subset of offers that are successfully completed or tried
succ_tried_offers = all_offers[(all_offers['successful_offer']==1) | (all_offers['tried_offer']==1)]
succ_tried_offers.head(10)

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,time_viewed,time_completed,time_expiry,successful_offer,tried_offer,failed_offer
0,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0
1,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0
2,0,3246,8,5,120,bogo,5,1,1,1,1,0.0,60.0,120,1,0,0
7,0,9940,0,20,240,discount,5,1,0,0,1,18.0,,240,0,1,0
8,0,2860,0,20,240,discount,5,1,0,0,1,150.0,,240,0,1,0
9,0,4366,7,10,168,bogo,10,1,1,1,0,102.0,510.0,168,0,1,0
11,0,11315,3,0,96,informational,0,1,1,0,1,18.0,,96,0,1,0
13,0,12359,0,20,240,discount,5,1,0,0,1,30.0,30.0,240,1,0,0
17,0,14351,9,10,240,discount,2,1,1,1,1,18.0,126.0,240,1,0,0
21,0,12079,0,20,240,discount,5,1,0,0,1,30.0,192.0,240,1,0,0


# **We will now do a cross-product between all successful/tried offers and all transactions**

We can then check if the spending occurred during the offer by checking if the time of spending.

1. If an offer is **probably successful**, then the spending should occur after the offer is received and before the offer is completed.

2. If an offer is **probably tried**, then the spending should occur after the offer is received but before the offer has expired.

If transactions occurred while the offer is still valid, then the offer is considered truly successful or tried.

In [36]:
succ_tried_offers = pd.merge(succ_tried_offers, transcript_trans, on='per_id', how='left')
succ_tried_offers.head(10)

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,time_viewed,time_completed,time_expiry,successful_offer,tried_offer,failed_offer,time_spent,amt_spent
0,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,132.0,19.89
1,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,144.0,17.78
2,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,222.0,19.67
3,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,240.0,29.72
4,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,378.0,23.93
5,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,510.0,21.72
6,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,534.0,26.56
7,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0,288.0,17.88
8,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0,294.0,21.43
9,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0,522.0,18.42


In [37]:
succ_tried_offers.head(30)

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,time_viewed,time_completed,time_expiry,successful_offer,tried_offer,failed_offer,time_spent,amt_spent
0,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,132.0,19.89
1,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,144.0,17.78
2,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,222.0,19.67
3,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,240.0,29.72
4,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,378.0,23.93
5,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,510.0,21.72
6,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,534.0,26.56
7,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0,288.0,17.88
8,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0,294.0,21.43
9,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0,522.0,18.42


In [0]:
def check_offer_transactions(df):
    if df['successful_offer'] == 1:
        if (df['time_spent'] >= df['time_received']) and (df['time_spent'] <= df['time_completed']):
            return 1
        else:
            return 0
    else:
        if (df['time_spent'] >= df['time_received']) and (df['time_spent'] <= df['time_expiry']):
            return 1
        else:
            return 0

succ_tried_offers['spent_during_offer'] = succ_tried_offers.apply(check_offer_transactions, axis=1)

In [39]:
succ_tried_offers.head(40)

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,time_viewed,time_completed,time_expiry,successful_offer,tried_offer,failed_offer,time_spent,amt_spent,spent_during_offer
0,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,132.0,19.89,1
1,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,144.0,17.78,0
2,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,222.0,19.67,0
3,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,240.0,29.72,0
4,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,378.0,23.93,0
5,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,510.0,21.72,0
6,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,534.0,26.56,0
7,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0,288.0,17.88,0
8,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0,294.0,21.43,0
9,0,13107,2,10,168,discount,2,1,1,0,1,18.0,,168,0,1,0,522.0,18.42,0


In [40]:
# offers that have transactions occuring during their validity period are very likely to be actual offers
succ_tried_offers = succ_tried_offers[succ_tried_offers['spent_during_offer']==1]

succ_tried_offers.head(10)

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,time_viewed,time_completed,time_expiry,successful_offer,tried_offer,failed_offer,time_spent,amt_spent,spent_during_offer
0,0,6962,6,5,168,bogo,5,1,1,0,1,6.0,132.0,168,1,0,0,132.0,19.89,1
10,0,3246,8,5,120,bogo,5,1,1,1,1,0.0,60.0,120,1,0,0,60.0,9.54,1
13,0,9940,0,20,240,discount,5,1,0,0,1,18.0,,240,0,1,0,210.0,12.33,1
34,0,11315,3,0,96,informational,0,1,1,0,1,18.0,,96,0,1,0,18.0,13.25,1
35,0,11315,3,0,96,informational,0,1,1,0,1,18.0,,96,0,1,0,72.0,18.9,1
42,0,12359,0,20,240,discount,5,1,0,0,1,30.0,30.0,240,1,0,0,30.0,339.69,1
50,0,14351,9,10,240,discount,2,1,1,1,1,18.0,126.0,240,1,0,0,126.0,28.29,1
55,0,12079,0,20,240,discount,5,1,0,0,1,30.0,192.0,240,1,0,0,162.0,19.91,1
56,0,12079,0,20,240,discount,5,1,0,0,1,30.0,192.0,240,1,0,0,192.0,27.23,1
64,0,921,4,10,120,bogo,10,1,1,1,1,0.0,6.0,120,1,0,0,6.0,19.88,1


In [41]:
succ_tried_offers.isnull().mean()

time_received         0.000000
per_id                0.000000
offer_id              0.000000
difficulty            0.000000
duration              0.000000
offer_type            0.000000
reward                0.000000
email                 0.000000
mobile                0.000000
social                0.000000
web                   0.000000
time_viewed           0.000000
time_completed        0.455735
time_expiry           0.000000
successful_offer      0.000000
tried_offer           0.000000
failed_offer          0.000000
time_spent            0.000000
amt_spent             0.000000
spent_during_offer    0.000000
dtype: float64

# **Find all failed offers**

We will now find out which offers had failed, meaning customers did not spend any money on these offers.

We have the list of all offers that were sent by the company in transcript_offer_received. 

We also have the list of offers that customers did spent money for in succ_tried_offers. 

Hence the difference of the two can tell us which offers actually failed.

In [42]:
# find all offers that were sent
all_offers = transcript_offer_received[['time_received', 'per_id', 'offer_id']]
all_offers.head()

Unnamed: 0,time_received,per_id,offer_id
0,0,6962,6
1,0,13107,2
2,0,3246,8
3,0,2669,3
4,0,9940,0


In [43]:
succ_tried_offers_summary = succ_tried_offers.groupby(['time_received',\
                                                       'per_id',\
                                                       'offer_id']).agg({'successful_offer':'min',\
                                                                         'tried_offer':'min',\
                                                                         'failed_offer':'min'}).reset_index()
succ_tried_offers_summary.head(10)

Unnamed: 0,time_received,per_id,offer_id,successful_offer,tried_offer,failed_offer
0,0,2,9,1,0,0
1,0,4,5,0,1,0
2,0,5,5,0,1,0
3,0,9,6,1,0,0
4,0,11,5,0,1,0
5,0,13,4,0,1,0
6,0,14,8,0,1,0
7,0,15,4,1,0,0
8,0,16,9,1,0,0
9,0,17,3,0,1,0


In [44]:
all_offers = pd.merge(all_offers, succ_tried_offers_summary, on=['time_received', 'per_id', 'offer_id'], how='left')
all_offers.head()

Unnamed: 0,time_received,per_id,offer_id,successful_offer,tried_offer,failed_offer
0,0,6962,6,1.0,0.0,0.0
1,0,13107,2,,,
2,0,3246,8,1.0,0.0,0.0
3,0,2669,3,,,
4,0,9940,0,0.0,1.0,0.0


In [45]:
# rows with NaN values in all 3 columns (successful_offer, tried_offer, failed_offer) are offers that failed
all_offers['successful_offer'].fillna(0, inplace=True)
all_offers['tried_offer'].fillna(0, inplace=True)
all_offers['failed_offer'].fillna(1, inplace=True)

all_offers.head(10)

#This is the most wholesome all offers scenario table which we can use for our EDA

Unnamed: 0,time_received,per_id,offer_id,successful_offer,tried_offer,failed_offer
0,0,6962,6,1.0,0.0,0.0
1,0,13107,2,0.0,0.0,1.0
2,0,3246,8,1.0,0.0,0.0
3,0,2669,3,0.0,0.0,1.0
4,0,9940,0,0.0,1.0,0.0
5,0,2860,0,0.0,0.0,1.0
6,0,4366,7,0.0,0.0,1.0
7,0,11315,3,0.0,1.0,0.0
8,0,12359,0,1.0,0.0,0.0
9,0,14351,9,1.0,0.0,0.0


In [46]:
failed_offers = all_offers[all_offers['failed_offer']==1]
failed_offers.head()

Unnamed: 0,time_received,per_id,offer_id,successful_offer,tried_offer,failed_offer
1,0,13107,2,0.0,0.0,1.0
3,0,2669,3,0.0,0.0,1.0
5,0,2860,0,0.0,0.0,1.0
6,0,4366,7,0.0,0.0,1.0
11,0,13686,6,0.0,0.0,1.0


In [0]:
def assign_day_num(df, time_col, day_name_col='day_num'):

    #Convert the hours into the appropriate days. Based on the assumption that
    #a day contains 24 hours

   
    #Step 1, create a string array of the day name based on time in hours
    
    start_of_day = np.arange(0,725,24)
    end_of_day = np.arange(23,750,24)
    day_name_list = []   
    
    for i in range(len(start_of_day)):
        day_name = str(start_of_day[i]) + "_" + str(end_of_day[i])
        day_name_list.append(day_name)
       
   #Step 2, assign EACH row with appropriate day_number(eg. 2nd day) and day_name(24_48)
    
    #list to store result for all the rows
    df_day_num = []
    df_day_name = []
    
    for row, col in df.iterrows():
        day_num = int(col[time_col] // 24) # Floor division, so hour 148 will be day 6, day count starts from 0, 0-29 days 
        
        day_name = day_name_list[day_num] # 6th index in day_name list is '144_167', the string we created using step 1
   
        #list.append
        df_day_num.append(day_num)
        df_day_name.append(day_name)
    
    #Step 3, allocate new columns in df with calculated column values
    df[day_name_col] = df_day_num
    df['day_name'] = df_day_name 
    
    return df

In [48]:
failed_offers = assign_day_num(failed_offers, 'time_received')
failed_offers['daily_amt_spent'] = 0
failed_offers['num_trans'] = 0

failed_offers.head(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,time_received,per_id,offer_id,successful_offer,tried_offer,failed_offer,day_num,day_name,daily_amt_spent,num_trans
1,0,13107,2,0.0,0.0,1.0,0,0_23,0,0
3,0,2669,3,0.0,0.0,1.0,0,0_23,0,0
5,0,2860,0,0.0,0.0,1.0,0,0_23,0,0
6,0,4366,7,0.0,0.0,1.0,0,0_23,0,0
11,0,13686,6,0.0,0.0,1.0,0,0_23,0,0
12,0,1722,7,0.0,0.0,1.0,0,0_23,0,0
13,0,7788,2,0.0,0.0,1.0,0,0_23,0,0
14,0,13928,6,0.0,0.0,1.0,0,0_23,0,0
20,0,5101,6,0.0,0.0,1.0,0,0_23,0,0
23,0,6786,4,0.0,0.0,1.0,0,0_23,0,0


In [49]:
daily_failed_offers = failed_offers[['day_num', 'per_id', 'offer_id', 'daily_amt_spent', 'num_trans']]
daily_failed_offers.head()

#day_num here is the time the offer was received by the customer

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans
1,0,13107,2,0,0
3,0,2669,3,0,0
5,0,2860,0,0,0
6,0,4366,7,0,0
11,0,13686,6,0,0


We now have the daily summary of offers that failed. Customers did not attempt these offers at all.

# **daily summary of all transactions that are carried out**

We will now produce a daily summary of customers' transaction behaviors. 

At the end of this section, we will know how much each customer spent and how many transactions he/she carried out for each offer type. This also includes offer id 10(apart from 0-9 actual offers) which tracks non-offer transactions

In [50]:
# check if there are duplicted per_id, time_spent, amt_spent
succ_tried_offers[succ_tried_offers.duplicated(subset=['time_spent', 'per_id', 'amt_spent', 'offer_id'])]

Unnamed: 0,time_received,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,time_viewed,time_completed,time_expiry,successful_offer,tried_offer,failed_offer,time_spent,amt_spent,spent_during_offer
71739,168,1519,9,10,240,discount,2,1,1,1,1,180.0,198.0,408,1,0,0,198.0,22.49,1
80393,168,8261,9,10,240,discount,2,1,1,1,1,198.0,390.0,408,1,0,0,192.0,0.29,1
80603,168,14757,9,10,240,discount,2,1,1,1,1,264.0,408.0,408,1,0,0,180.0,12.65,1
81502,168,906,9,10,240,discount,2,1,1,1,1,210.0,390.0,408,1,0,0,186.0,2.69,1
89816,168,7429,0,20,240,discount,5,1,0,0,1,198.0,210.0,408,1,0,0,210.0,22,1
89952,168,5071,9,10,240,discount,2,1,1,1,1,216.0,,408,0,1,0,186.0,3.58,1
91965,168,3747,9,10,240,discount,2,1,1,1,1,192.0,594.0,408,0,1,0,234.0,2.58,1
92480,168,6970,0,20,240,discount,5,1,0,0,1,216.0,216.0,408,1,0,0,204.0,9.19,1
92481,168,6970,0,20,240,discount,5,1,0,0,1,216.0,216.0,408,1,0,0,216.0,14.31,1
94747,168,2465,1,7,168,discount,3,1,1,1,1,198.0,,336,0,1,0,168.0,2.74,1


In [0]:
offer_transactions = succ_tried_offers[['time_spent', 'per_id', 'amt_spent', 'spent_during_offer', 'offer_id']]

transactions_labeled = pd.merge(transcript_trans, offer_transactions, on=['time_spent', 'per_id', 'amt_spent'], how='left')

# these are non-offer transactions -> spent during offer is 0 means Offer 10 assigned
transactions_labeled['spent_during_offer'].fillna(0, inplace=True)
transactions_labeled['offer_id'].fillna(10, inplace=True)



In [52]:
transactions_labeled.head(10)

Unnamed: 0,time_spent,per_id,amt_spent,spent_during_offer,offer_id
0,0,148,0.83,1.0,7.0
1,0,9326,34.56,1.0,2.0
2,0,4920,13.23,0.0,10.0
3,0,10445,19.51,1.0,5.0
4,0,14742,18.97,0.0,10.0
5,0,5705,33.9,1.0,6.0
6,0,9901,18.59,0.0,10.0
7,0,5991,18.01,1.0,7.0
8,0,8337,19.11,1.0,4.0
9,0,4460,36.19,1.0,3.0


In [53]:
# check
print(transcript_trans.shape)
print(transactions_labeled.shape)

(123957, 3)
(129438, 5)


In [0]:
transactions_labeled = assign_day_num(transactions_labeled, 'time_spent')

In [55]:
transactions_labeled.head(20)

Unnamed: 0,time_spent,per_id,amt_spent,spent_during_offer,offer_id,day_num,day_name
0,0,148,0.83,1.0,7.0,0,0_23
1,0,9326,34.56,1.0,2.0,0,0_23
2,0,4920,13.23,0.0,10.0,0,0_23
3,0,10445,19.51,1.0,5.0,0,0_23
4,0,14742,18.97,0.0,10.0,0,0_23
5,0,5705,33.9,1.0,6.0,0,0_23
6,0,9901,18.59,0.0,10.0,0,0_23
7,0,5991,18.01,1.0,7.0,0,0_23
8,0,8337,19.11,1.0,4.0,0,0_23
9,0,4460,36.19,1.0,3.0,0,0_23


In [0]:
transcript_offer = assign_day_num(transcript_offer, 'time') 

In [57]:
transcript_offer.head(30) 

Unnamed: 0,event,time,per_id,offer_id,difficulty,duration,offer_type,reward,email,mobile,social,web,day_num,day_name
0,offer received,0,6962,6,5,168,bogo,5,1,1,0,1,0,0_23
1,offer received,0,13107,2,10,168,discount,2,1,1,0,1,0,0_23
2,offer received,0,3246,8,5,120,bogo,5,1,1,1,1,0,0_23
3,offer received,0,2669,3,0,96,informational,0,1,1,0,1,0,0_23
4,offer received,0,9940,0,20,240,discount,5,1,0,0,1,0,0_23
5,offer received,0,2860,0,20,240,discount,5,1,0,0,1,0,0_23
6,offer received,0,4366,7,10,168,bogo,10,1,1,1,0,0,0_23
7,offer received,0,11315,3,0,96,informational,0,1,1,0,1,0,0_23
8,offer received,0,12359,0,20,240,discount,5,1,0,0,1,0,0_23
9,offer received,0,14351,9,10,240,discount,2,1,1,1,1,0,0_23


In [0]:
#using transactions_labeled to create summary of daily transactions
daily_transactions = transactions_labeled.groupby(['day_num',\
                                                     'per_id',\
                                                     'offer_id']).agg({'amt_spent':'sum',\
                                                                       'spent_during_offer':'count' #how many times he spent during offer->num_trans
                                                                      }).reset_index()

daily_transactions.rename(index=str, columns={"amt_spent":"daily_amt_spent",\
                                                "spent_during_offer":"num_trans"
                                               }, inplace=True)


In [59]:
daily_transactions.head(20)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans
0,0,6,10.0,25.62,1
1,0,10,10.0,27.26,1
2,0,15,4.0,18.26,1
3,0,15,10.0,14.9,1
4,0,17,3.0,6.03,1
5,0,25,2.0,19.84,1
6,0,37,1.0,0.23,1
7,0,44,10.0,25.03,1
8,0,47,7.0,26.69,1
9,0,51,1.0,15.36,2


# **Find out which customers did not make any non-promotional transactions and the days these events occurred**

We will now find out which days did individuals not spend any money during non-promotional situations.

We start off by producing the dataframe of all possible day, person and offer_id_10 instances. Offer id 10 is used to track non-promotional transactions. We will call this dataframe non_offer_trans.

Every day, we will only add individuals whom we have seen so far in the transcript. 


In [60]:
#transcript original df is used , which contains all events of the custoemr
transcript_day = assign_day_num(transcript, 'time')
transcript_day.head(30)

Unnamed: 0,event,time,value,per_id,value_type,value_id_amt,day_num,day_name
0,offer received,0,{'offer id': '9b98b8c7a33c4b65b9aebfe6a799e6d9'},6962,offer id,9b98b8c7a33c4b65b9aebfe6a799e6d9,0,0_23
2,offer received,0,{'offer id': '2906b810c7d4411798c6938adc9daaa5'},13107,offer id,2906b810c7d4411798c6938adc9daaa5,0,0_23
5,offer received,0,{'offer id': 'f19421c1d4aa40978ebb69ca19b0e20d'},3246,offer id,f19421c1d4aa40978ebb69ca19b0e20d,0,0_23
7,offer received,0,{'offer id': '3f207df678b143eea3cee63160fa8bed'},2669,offer id,3f207df678b143eea3cee63160fa8bed,0,0_23
8,offer received,0,{'offer id': '0b1e1539f2cc45b7b9fa7c272da2e1d7'},9940,offer id,0b1e1539f2cc45b7b9fa7c272da2e1d7,0,0_23
9,offer received,0,{'offer id': '0b1e1539f2cc45b7b9fa7c272da2e1d7'},2860,offer id,0b1e1539f2cc45b7b9fa7c272da2e1d7,0,0_23
12,offer received,0,{'offer id': 'ae264e3637204a6fb9bb56bc8210ddfd'},4366,offer id,ae264e3637204a6fb9bb56bc8210ddfd,0,0_23
13,offer received,0,{'offer id': '3f207df678b143eea3cee63160fa8bed'},11315,offer id,3f207df678b143eea3cee63160fa8bed,0,0_23
14,offer received,0,{'offer id': '0b1e1539f2cc45b7b9fa7c272da2e1d7'},12359,offer id,0b1e1539f2cc45b7b9fa7c272da2e1d7,0,0_23
15,offer received,0,{'offer id': 'fafdcd668e3743c1bb461111dcafc2a4'},14351,offer id,fafdcd668e3743c1bb461111dcafc2a4,0,0_23


In [61]:

#To find out since when a person has been a customer


# Each day, add only individuals who we have seen so far in the transcript
person_seen_so_far = np.array([])

time_periods = transactions_labeled.day_num.max() #here it is 22

day_col = np.array([])
per_id_col = np.array([])
offer_id_col = np.array([])

for day_num in range(time_periods):                                                  #from day 0 to 22
    #trans_subset = transcript_offer[transcript_offer['day_num']==day_num]
    trans_subset = transcript_day[transcript_day['day_num']==day_num]               #get all transcipts(events+transactions) from transcript_day where day_num=0 
    person_seen = trans_subset.per_id.unique()                                      #add all the unique person_id encountered to the person_seen array
    person_seen = np.sort(person_seen)
    person_seen_so_far = np.append(person_seen_so_far, person_seen)                 #add the new person_seen array to the existing person_seen_so_far array
    person_seen_so_far = np.unique(person_seen_so_far)                              #remove duplicates
    person_seen_so_far = np.sort(person_seen_so_far)
    
    per_id_col = np.append(per_id_col, person_seen_so_far)                          #per_id col contains all the person_seen_so_far
    day_col = np.append(day_col, np.repeat(np.array(day_num), len(person_seen_so_far)))   #day_col assigns current day_num(here 0) to all the person_seen-so_far
    offer_id_col = np.append(offer_id_col, np.repeat(np.array(10), len(person_seen_so_far))) #Assign offer_id 10 to all the person_id and day combination

#thus so far we have all the person who are the customers since day 0 and they are assigned offer 10(no offer), regardless if they received an order or not on this day
    
non_offer_trans = pd.DataFrame({'day_num':day_col,'per_id':per_id_col,'offer_id':offer_id_col})

non_offer_trans.head(35)

Unnamed: 0,day_num,per_id,offer_id
0,0.0,1.0,10.0
1,0.0,2.0,10.0
2,0.0,4.0,10.0
3,0.0,5.0,10.0
4,0.0,6.0,10.0
5,0.0,8.0,10.0
6,0.0,9.0,10.0
7,0.0,10.0,10.0
8,0.0,11.0,10.0
9,0.0,12.0,10.0


In [62]:
# find the days when a person did not perform any transactions associated with 'no offers'

#merging on daily transactions (which has offer_id 10 marked for non-offer trans) will ensure that we find exactly the days when the person made non-offer transactions
#since it is a left merge, only the offer_id 10 will be considered


non_offer_trans = pd.merge(non_offer_trans, daily_transactions, on=['day_num', 'per_id', 'offer_id'], how='left')
non_offer_trans.head(35)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans
0,0.0,1.0,10.0,,
1,0.0,2.0,10.0,,
2,0.0,4.0,10.0,,
3,0.0,5.0,10.0,,
4,0.0,6.0,10.0,25.62,1.0
5,0.0,8.0,10.0,,
6,0.0,9.0,10.0,,
7,0.0,10.0,10.0,27.26,1.0
8,0.0,11.0,10.0,,
9,0.0,12.0,10.0,,


In [0]:
#The NaN means that no transactions were made for the "offer_id_10" for that day by that person
non_offer_trans['daily_amt_spent'].fillna(0, inplace=True)
non_offer_trans['num_trans'].fillna(0, inplace=True)



In [64]:
non_offer_trans.head(30)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans
0,0.0,1.0,10.0,0.0,0.0
1,0.0,2.0,10.0,0.0,0.0
2,0.0,4.0,10.0,0.0,0.0
3,0.0,5.0,10.0,0.0,0.0
4,0.0,6.0,10.0,25.62,1.0
5,0.0,8.0,10.0,0.0,0.0
6,0.0,9.0,10.0,0.0,0.0
7,0.0,10.0,10.0,27.26,1.0
8,0.0,11.0,10.0,0.0,0.0
9,0.0,12.0,10.0,0.0,0.0


In [0]:
# during these days, individuals made no transactions for the 'offer_id_10'
no_offer_no_trans = non_offer_trans[non_offer_trans['num_trans']==0]


In [66]:
no_offer_no_trans.head(30)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans
0,0.0,1.0,10.0,0.0,0.0
1,0.0,2.0,10.0,0.0,0.0
2,0.0,4.0,10.0,0.0,0.0
3,0.0,5.0,10.0,0.0,0.0
5,0.0,8.0,10.0,0.0,0.0
6,0.0,9.0,10.0,0.0,0.0
8,0.0,11.0,10.0,0.0,0.0
9,0.0,12.0,10.0,0.0,0.0
10,0.0,13.0,10.0,0.0,0.0
11,0.0,14.0,10.0,0.0,0.0


# **Build data**

We will now combine the daily summary of transactions, the daily summary of failed offers and the daily summary of when no non-promotional spending occurred. The resulting dataframe tracks on a daily basis:

which offer type a customer recieved and how much did he/she spend on them and how many transactions did he/she carried out for them
did customers make any non-promotional transactions. If so how much did he/she spent and how many transactions did he/she carried out.

In [67]:
daily_data = pd.concat([daily_transactions, daily_failed_offers], axis=0)
daily_data = pd.concat([daily_data, no_offer_no_trans], axis=0)
daily_data.sort_values(by=['day_num', 'per_id', 'offer_id'], inplace=True)
daily_data.reset_index(drop=True, inplace=True)

daily_data.head(50)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans
0,0.0,1.0,3.0,0.0,0.0
1,0.0,1.0,10.0,0.0,0.0
2,0.0,2.0,10.0,0.0,0.0
3,0.0,4.0,10.0,0.0,0.0
4,0.0,5.0,10.0,0.0,0.0
5,0.0,6.0,10.0,25.62,1.0
6,0.0,8.0,7.0,0.0,0.0
7,0.0,8.0,10.0,0.0,0.0
8,0.0,9.0,10.0,0.0,0.0
9,0.0,10.0,7.0,0.0,0.0


In [68]:
daily_data['amt_spent_per_trans'] = daily_data['daily_amt_spent'] / daily_data['num_trans']

daily_data['amt_spent_per_trans'].fillna(0, inplace=True)

daily_data.head()

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans,amt_spent_per_trans
0,0.0,1.0,3.0,0.0,0.0,0.0
1,0.0,1.0,10.0,0.0,0.0,0.0
2,0.0,2.0,10.0,0.0,0.0,0.0
3,0.0,4.0,10.0,0.0,0.0,0.0
4,0.0,5.0,10.0,0.0,0.0,0.0


In [69]:
daily_data.offer_id.value_counts()

10.0    412894
7.0      10563
9.0       9939
4.0       9257
5.0       8693
1.0       8575
3.0       8160
8.0       8044
0.0       7829
2.0       7530
6.0       7448
Name: offer_id, dtype: int64

In [70]:
daily_data.head(20)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans,amt_spent_per_trans
0,0.0,1.0,3.0,0.0,0.0,0.0
1,0.0,1.0,10.0,0.0,0.0,0.0
2,0.0,2.0,10.0,0.0,0.0,0.0
3,0.0,4.0,10.0,0.0,0.0,0.0
4,0.0,5.0,10.0,0.0,0.0,0.0
5,0.0,6.0,10.0,25.62,1.0,25.62
6,0.0,8.0,7.0,0.0,0.0,0.0
7,0.0,8.0,10.0,0.0,0.0,0.0
8,0.0,9.0,10.0,0.0,0.0,0.0
9,0.0,10.0,7.0,0.0,0.0,0.0


# **Compute Profits**

We will compute the amount of profits generated by each instance in the daily dataset.

Since customers were only exposed to a maximum of 1 occurrence of an offer type every day, this means that the cost in daily_data is simply the reward of the promotion if it was completed.

We can calculate the amount of profits each individual generated for each offer type each day by following the 3 rules:

If the offer was successful, the profit would be the daily revenue minus the cost of the offer. Note that informational offers have no cost.
If the offer was not successful, the profit would be the revenue generated in that instance.
If the transactions were not made as part of an offer, the profit would be the revenue since there are no cost involved.

In [71]:
# we will record daily non-promotional transactions as having occurred under 1 'offer_id_10' per day
daily_data['num_offers'] = 1
daily_data.head(30)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans,amt_spent_per_trans,num_offers
0,0.0,1.0,3.0,0.0,0.0,0.0,1
1,0.0,1.0,10.0,0.0,0.0,0.0,1
2,0.0,2.0,10.0,0.0,0.0,0.0,1
3,0.0,4.0,10.0,0.0,0.0,0.0,1
4,0.0,5.0,10.0,0.0,0.0,0.0,1
5,0.0,6.0,10.0,25.62,1.0,25.62,1
6,0.0,8.0,7.0,0.0,0.0,0.0,1
7,0.0,8.0,10.0,0.0,0.0,0.0,1
8,0.0,9.0,10.0,0.0,0.0,0.0,1
9,0.0,10.0,7.0,0.0,0.0,0.0,1


In [72]:
offer_cost = portfolio[['offer_id', 'difficulty', 'reward']]
offer_cost.head(12)

Unnamed: 0,offer_id,difficulty,reward
0,7,10,10
1,4,10,10
2,3,0,0
3,6,5,5
4,0,20,5
5,1,7,3
6,9,10,2
7,5,0,0
8,8,5,5
9,2,10,2


In [73]:
daily_data = pd.merge(daily_data, offer_cost, on='offer_id', how='left')
# these are no offer. So fill NaN with 0 cost
daily_data['reward'].fillna(0, inplace=True)
daily_data['difficulty'].fillna(0, inplace=True)
daily_data.head()

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans,amt_spent_per_trans,num_offers,difficulty,reward
0,0.0,1.0,3.0,0.0,0.0,0.0,1,0.0,0.0
1,0.0,1.0,10.0,0.0,0.0,0.0,1,0.0,0.0
2,0.0,2.0,10.0,0.0,0.0,0.0,1,0.0,0.0
3,0.0,4.0,10.0,0.0,0.0,0.0,1,0.0,0.0
4,0.0,5.0,10.0,0.0,0.0,0.0,1,0.0,0.0


In [0]:
#this is a function to calculate the cost of an offer

daily_data_cost = []

bogo_disc_offer_ids = [0,1,2,4,6,7,8,9] #not considering the informational and offer_id_10

for row, col in daily_data.iterrows():
    # if individual successfully completed the offer
    if col['daily_amt_spent'] > col['difficulty']:
        # if offer is either a bogo or discount offer
        if col['offer_id'] in bogo_disc_offer_ids:
            cost = col['num_offers'] * col['reward']
            daily_data_cost.append(cost)
        # if promotion or no offer
        else:
            daily_data_cost.append(0.0)
    else:
        daily_data_cost.append(0.0)
        
daily_data['cost'] = daily_data_cost

# **Has_Profit Calculated**

In [0]:
daily_data['profit'] = daily_data['daily_amt_spent'] - daily_data['cost']
has_profit = np.zeros(daily_data.shape[0])
has_profit[np.where(daily_data['profit'] > 0)] = 1
daily_data['has_profit'] = has_profit
daily_data.drop(columns=['reward', 'difficulty'], inplace=True)

In [76]:
daily_data.head(30)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans,amt_spent_per_trans,num_offers,cost,profit,has_profit
0,0.0,1.0,3.0,0.0,0.0,0.0,1,0.0,0.0,0.0
1,0.0,1.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0
2,0.0,2.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0
3,0.0,4.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0
4,0.0,5.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0
5,0.0,6.0,10.0,25.62,1.0,25.62,1,0.0,25.62,1.0
6,0.0,8.0,7.0,0.0,0.0,0.0,1,0.0,0.0,0.0
7,0.0,8.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0
8,0.0,9.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0
9,0.0,10.0,7.0,0.0,0.0,0.0,1,0.0,0.0,0.0


In [77]:
# these are labels generated for experimenting with other uplift model types

# only give positive labels to has_profit and offer id not equal to 10
target = []

# assign which quadrant each data point belongs to
quadrant = []

for row, col in daily_data.iterrows():
    if (col['has_profit'] == 1) and (col['offer_id'] != 10):
        target.append(1)
    else:
        target.append(0)
        
    if (col['has_profit'] == 1) and (col['offer_id'] != 10):
        # TR group. Received offer and respond (spend more than cost of offer)
        quadrant.append(0)
    elif (col['has_profit'] == 1) and (col['offer_id'] == 10):
        # CR group. Did not receive offer but respond (spend more than 0)
        quadrant.append(1)
    elif (col['has_profit'] == 0) and (col['offer_id'] != 10):
        # TN group. Received offer but did not respond (spend less than cost of offer)
        quadrant.append(2)
    else:
        # CN group. Did not receive offer and did not respond (no spending)
        quadrant.append(3)
        
daily_data['target'] = target
daily_data['quadrant'] = quadrant

daily_data.head()

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans,amt_spent_per_trans,num_offers,cost,profit,has_profit,target,quadrant
0,0.0,1.0,3.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,2
1,0.0,1.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3
2,0.0,2.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3
3,0.0,4.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3
4,0.0,5.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3


In [78]:
daily_data.head(50)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans,amt_spent_per_trans,num_offers,cost,profit,has_profit,target,quadrant
0,0.0,1.0,3.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,2
1,0.0,1.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3
2,0.0,2.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3
3,0.0,4.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3
4,0.0,5.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3
5,0.0,6.0,10.0,25.62,1.0,25.62,1,0.0,25.62,1.0,0,1
6,0.0,8.0,7.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,2
7,0.0,8.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3
8,0.0,9.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3
9,0.0,10.0,7.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,2


In [79]:
# encode portfolio

cat_var_to_enc = ['offer_type']

for var in cat_var_to_enc:
    portfolio_enc = pd.concat([portfolio.drop(var, axis=1),\
                               pd.get_dummies(portfolio[var],\
                                              prefix=var, prefix_sep='_', drop_first=False)], axis=1)

cat_var_to_enc = ['offer_id']

for var in cat_var_to_enc:
    portfolio_enc = pd.concat([portfolio_enc,\
                               pd.get_dummies(portfolio_enc[var],\
                                              prefix=var, prefix_sep='_', drop_first=False)], axis=1)    

portfolio_enc.head()

Unnamed: 0,difficulty,duration,reward,offer_id,email,mobile,social,web,offer_type_bogo,offer_type_discount,offer_type_informational,offer_id_0,offer_id_1,offer_id_2,offer_id_3,offer_id_4,offer_id_5,offer_id_6,offer_id_7,offer_id_8,offer_id_9
0,10,168,10,7,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0
1,10,120,10,4,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0
2,0,96,0,3,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0
3,5,168,5,6,1,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0
4,20,240,5,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0


In [80]:
no_offer_df = pd.DataFrame([[0,30,0,10,0,0,\
                            0,0,0,0,\
                            0,0,0,\
                            0,0,0,0,\
                            0,0,0,0,1
                           ]], columns=['difficulty', 'duration', 'reward', 'offer_id', 'email', 'mobile',\
                                       'social', 'web', 'offer_type_bogo', 'offer_type_discount',\
                                       'offer_type_informational', 'offer_id_0', 'offer_id_1',\
                                       'offer_id_2', 'offer_id_3', 'offer_id_4', 'offer_id_5',\
                                       'offer_id_6', 'offer_id_7', 'offer_id_8', 'offer_id_9', 'offer_id_10'])
no_offer_df

Unnamed: 0,difficulty,duration,reward,offer_id,email,mobile,social,web,offer_type_bogo,offer_type_discount,offer_type_informational,offer_id_0,offer_id_1,offer_id_2,offer_id_3,offer_id_4,offer_id_5,offer_id_6,offer_id_7,offer_id_8,offer_id_9,offer_id_10
0,0,30,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [81]:
# create column for no offer indicator variable
portfolio_enc['offer_id_10'] = 0

# add information for no offer to portfolio_enc dataframe
portfolio_enc = portfolio_enc.append(no_offer_df, ignore_index=True)

portfolio_enc

Unnamed: 0,difficulty,duration,reward,offer_id,email,mobile,social,web,offer_type_bogo,offer_type_discount,offer_type_informational,offer_id_0,offer_id_1,offer_id_2,offer_id_3,offer_id_4,offer_id_5,offer_id_6,offer_id_7,offer_id_8,offer_id_9,offer_id_10
0,10,168,10,7,1,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0
1,10,120,10,4,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0
2,0,96,0,3,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0
3,5,168,5,6,1,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0
4,20,240,5,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
5,7,168,3,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0
6,10,240,2,9,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0
7,0,72,0,5,1,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
8,5,120,5,8,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0
9,10,168,2,2,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0


In [82]:
# encode profile

cat_var_to_enc = ['gender']

for var in cat_var_to_enc:
    profile_enc = pd.concat([profile,\
                          pd.get_dummies(profile[var],\
                                         prefix=var, prefix_sep='_', drop_first=False)], axis=1)
profile_enc.head()

Unnamed: 0,age,became_member_on,gender,income,per_id,gender_F,gender_M,gender_O
1,55,20170715,F,112000.0,346,1,0,0
3,75,20170509,F,100000.0,6962,1,0,0
5,68,20180426,M,70000.0,13107,0,1,0
8,65,20180209,M,53000.0,3246,0,1,0
12,58,20171111,M,51000.0,2669,0,1,0


In [83]:
daily_data = pd.merge(daily_data, profile_enc, on='per_id', how='left')
daily_data = pd.merge(daily_data, portfolio_enc, on='offer_id', how='left')
daily_data.head()

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans,amt_spent_per_trans,num_offers,cost,profit,has_profit,target,quadrant,age,became_member_on,gender,income,gender_F,gender_M,gender_O,difficulty,duration,reward,email,mobile,social,web,offer_type_bogo,offer_type_discount,offer_type_informational,offer_id_0,offer_id_1,offer_id_2,offer_id_3,offer_id_4,offer_id_5,offer_id_6,offer_id_7,offer_id_8,offer_id_9,offer_id_10
0,0.0,1.0,3.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,2,40,20180109,O,57000.0,0,0,1,0,96,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0
1,0.0,1.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,40,20180109,O,57000.0,0,0,1,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,0.0,2.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,59,20160304,F,90000.0,1,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,0.0,4.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,26,20170621,F,73000.0,1,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,0.0,5.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,19,20160809,F,65000.0,1,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [84]:
daily_data.isnull().mean()

day_num                     0.0
per_id                      0.0
offer_id                    0.0
daily_amt_spent             0.0
num_trans                   0.0
amt_spent_per_trans         0.0
num_offers                  0.0
cost                        0.0
profit                      0.0
has_profit                  0.0
target                      0.0
quadrant                    0.0
age                         0.0
became_member_on            0.0
gender                      0.0
income                      0.0
gender_F                    0.0
gender_M                    0.0
gender_O                    0.0
difficulty                  0.0
duration                    0.0
reward                      0.0
email                       0.0
mobile                      0.0
social                      0.0
web                         0.0
offer_type_bogo             0.0
offer_type_discount         0.0
offer_type_informational    0.0
offer_id_0                  0.0
offer_id_1                  0.0
offer_id

In [85]:
daily_data.offer_id.value_counts()

10.0    412894
7.0      10563
9.0       9939
4.0       9257
5.0       8693
1.0       8575
3.0       8160
8.0       8044
0.0       7829
2.0       7530
6.0       7448
Name: offer_id, dtype: int64

In [86]:
# print distribution of has_profit labels
for offer_num in range(11):
    data_subset = daily_data[daily_data['offer_id']==offer_num]
    print(data_subset.has_profit.value_counts())

0.0    4999
1.0    2830
Name: has_profit, dtype: int64
1.0    6928
0.0    1647
Name: has_profit, dtype: int64
0.0    4188
1.0    3342
Name: has_profit, dtype: int64
0.0    4450
1.0    3710
Name: has_profit, dtype: int64
1.0    7310
0.0    1947
Name: has_profit, dtype: int64
1.0    5753
0.0    2940
Name: has_profit, dtype: int64
0.0    4278
1.0    3170
Name: has_profit, dtype: int64
1.0    8496
0.0    2067
Name: has_profit, dtype: int64
1.0    5947
0.0    2097
Name: has_profit, dtype: int64
1.0    8682
0.0    1257
Name: has_profit, dtype: int64
0.0    355212
1.0     57682
Name: has_profit, dtype: int64


In [0]:
cred = service_account.Credentials.from_service_account_file('dsmt-team5-finalproject-4f1119faadf9.json')
project_id = "dsmt-team5-finalproject"
dataset_name = 'tacobell'
table1_name = 'daily_data_rolling'
if_exists_val = 'replace'

destination_name = dataset_name +'.'+table1_name
print(destination_name)

daily_data.to_gbq(destination_name, project_id = project_id, if_exists = if_exists_val, credentials = cred)

In [89]:
daily_data[daily_data['per_id']==7996].head(20)

Unnamed: 0,day_num,per_id,offer_id,daily_amt_spent,num_trans,amt_spent_per_trans,num_offers,cost,profit,has_profit,target,quadrant,age,became_member_on,gender,income,gender_F,gender_M,gender_O,difficulty,duration,reward,email,mobile,social,web,offer_type_bogo,offer_type_discount,offer_type_informational,offer_id_0,offer_id_1,offer_id_2,offer_id_3,offer_id_4,offer_id_5,offer_id_6,offer_id_7,offer_id_8,offer_id_9,offer_id_10
9753,0.0,7996.0,2.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,2,20,20151011,M,55000.0,0,1,0,10,168,2,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0
9754,0.0,7996.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,20,20151011,M,55000.0,0,1,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
25424,1.0,7996.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,20,20151011,M,55000.0,0,1,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
39222,2.0,7996.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,20,20151011,M,55000.0,0,1,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
53025,3.0,7996.0,10.0,3.61,1.0,3.61,1,0.0,3.61,1.0,0,1,20,20151011,M,55000.0,0,1,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
66777,4.0,7996.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,20,20151011,M,55000.0,0,1,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
80420,5.0,7996.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,20,20151011,M,55000.0,0,1,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
94030,6.0,7996.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,20,20151011,M,55000.0,0,1,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
111741,7.0,7996.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,20,20151011,M,55000.0,0,1,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
130619,8.0,7996.0,10.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0,3,20,20151011,M,55000.0,0,1,0,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
