In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
dataset_name = 'coupon_recommendations'

In [3]:
input_dir = './raw'
inp_fname = 'in-vehicle-coupon-recommendation.csv'
output_dir = f'./../../processed/{dataset_name}/'
outp_fname = os.path.join(output_dir, f'{dataset_name}.csv')

# Read Data

In [4]:
data = pd.read_csv(os.path.join(input_dir, inp_fname))
data.head()

Unnamed: 0,destination,passanger,weather,temperature,time,coupon,expiration,gender,age,maritalStatus,...,CoffeeHouse,CarryAway,RestaurantLessThan20,Restaurant20To50,toCoupon_GEQ5min,toCoupon_GEQ15min,toCoupon_GEQ25min,direction_same,direction_opp,Y
0,No Urgent Place,Alone,Sunny,55,2PM,Restaurant(<20),1d,Female,21,Unmarried partner,...,never,,4~8,1~3,1,0,0,0,1,1
1,No Urgent Place,Friend(s),Sunny,80,10AM,Coffee House,2h,Female,21,Unmarried partner,...,never,,4~8,1~3,1,0,0,0,1,0
2,No Urgent Place,Friend(s),Sunny,80,10AM,Carry out & Take away,2h,Female,21,Unmarried partner,...,never,,4~8,1~3,1,1,0,0,1,1
3,No Urgent Place,Friend(s),Sunny,80,2PM,Coffee House,2h,Female,21,Unmarried partner,...,never,,4~8,1~3,1,1,0,0,1,0
4,No Urgent Place,Friend(s),Sunny,80,2PM,Coffee House,1d,Female,21,Unmarried partner,...,never,,4~8,1~3,1,1,0,0,1,0


# Cosmetic changes to field names

In [5]:
# fix "passanger" name typo 
data.rename({"passanger": "passenger"}, axis=1, inplace=True)
data.head()

# Use a more informative target name
data.rename({"Y": "accept_coupon"}, axis=1, inplace=True)
data.head()

Unnamed: 0,destination,passenger,weather,temperature,time,coupon,expiration,gender,age,maritalStatus,...,CoffeeHouse,CarryAway,RestaurantLessThan20,Restaurant20To50,toCoupon_GEQ5min,toCoupon_GEQ15min,toCoupon_GEQ25min,direction_same,direction_opp,accept_coupon
0,No Urgent Place,Alone,Sunny,55,2PM,Restaurant(<20),1d,Female,21,Unmarried partner,...,never,,4~8,1~3,1,0,0,0,1,1
1,No Urgent Place,Friend(s),Sunny,80,10AM,Coffee House,2h,Female,21,Unmarried partner,...,never,,4~8,1~3,1,0,0,0,1,0
2,No Urgent Place,Friend(s),Sunny,80,10AM,Carry out & Take away,2h,Female,21,Unmarried partner,...,never,,4~8,1~3,1,1,0,0,1,1
3,No Urgent Place,Friend(s),Sunny,80,2PM,Coffee House,2h,Female,21,Unmarried partner,...,never,,4~8,1~3,1,1,0,0,1,0
4,No Urgent Place,Friend(s),Sunny,80,2PM,Coffee House,1d,Female,21,Unmarried partner,...,never,,4~8,1~3,1,1,0,0,1,0


# Insert Id Column

In [6]:
id_col = "id"
target_col = "Y"

In [7]:
# insert Id column 
if id_col not in data.columns:
    N = data.shape[0]
    data.insert(0, id_col, np.arange(N))
    print(data.head())

data[id_col] = data[id_col].astype(str)

   id      destination  passenger weather  temperature  time  \
0   0  No Urgent Place      Alone   Sunny           55   2PM   
1   1  No Urgent Place  Friend(s)   Sunny           80  10AM   
2   2  No Urgent Place  Friend(s)   Sunny           80  10AM   
3   3  No Urgent Place  Friend(s)   Sunny           80   2PM   
4   4  No Urgent Place  Friend(s)   Sunny           80   2PM   

                  coupon expiration  gender age  ... CoffeeHouse  CarryAway  \
0        Restaurant(<20)         1d  Female  21  ...       never        NaN   
1           Coffee House         2h  Female  21  ...       never        NaN   
2  Carry out & Take away         2h  Female  21  ...       never        NaN   
3           Coffee House         2h  Female  21  ...       never        NaN   
4           Coffee House         1d  Female  21  ...       never        NaN   

  RestaurantLessThan20 Restaurant20To50 toCoupon_GEQ5min toCoupon_GEQ15min  \
0                  4~8              1~3                1      

# Save Main Data File

In [8]:
data.to_csv(outp_fname, index=False)