In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
dataset=pd.read_csv("../input/invehicle-coupon-recommendation/in-vehicle-coupon-recommendation.csv")
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.nunique()

In [None]:
dataset.skew()

In [None]:
dataset.isnull().sum().sort_values(ascending=False) * 100 /len(dataset)

In [None]:
round((dataset[['car','CoffeeHouse','Restaurant20To50','CarryAway','RestaurantLessThan20','Bar']].isnull().sum()*100/len(dataset)),2)

In [None]:
dataset[['car','CoffeeHouse','Restaurant20To50','CarryAway','RestaurantLessThan20','Bar']].info()

In [None]:
dataset['Y'].value_counts()/len(dataset)

In [None]:
dataset['occupation'].value_counts().sort_values(ascending=False)

In [None]:
dummies = dataset['occupation'].str.get_dummies(sep='&')
dummies.head()

In [None]:
dummies.info()

In [None]:
dummies.columns

In [None]:
dummies['Arts']=dummies['Arts Design Entertainment Sports ']
dummies['Design']=dummies['Arts Design Entertainment Sports ']
dummies['Arts']=dummies['Arts Design Entertainment Sports ']
dummies['Entertainment']=dummies['Arts Design Entertainment Sports ']
dummies['Social Science']=dummies['Life Physical Social Science']
dummies['Physical Science']=dummies['Life Physical Social Science']
dummies['Life Science']=dummies['Life Physical Social Science']
dummies.drop(['Arts Design Entertainment Sports ','Life Physical Social Science'],axis=1,inplace=True)  
dummies.info()

In [None]:
dataset = pd.concat([dataset, dummies], axis=1)
dataset.head()

In [None]:
dataset.drop(['occupation'],axis=1,inplace=True)
dataset.head()

In [None]:
!pip install pandas-profiling

In [None]:
from pandas_profiling import ProfileReport
profile = ProfileReport(dataset, title="EDA Report")
profile

In [None]:
dataset.shape

In [None]:
dataset.drop_duplicates(inplace=True)
dataset.shape

In [None]:
dataset.drop(['toCoupon_GEQ5min','car'],axis=1,inplace=True)  
dataset.info()

In [None]:
profile = ProfileReport(dataset, title="EDA Report")
profile

In [None]:
data = dataset.sample(frac=0.95, random_state=42)
data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)
print('Data for Modeling: ' + str(data.shape))

In [None]:
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

In [None]:
!pip install pycaret[full]

In [None]:
from pycaret.classification import *

In [None]:
data.info()

In [None]:
data.columns

In [None]:
categorical = []
for i in data.columns:
    if (data[i].dtype=='object'):
        categorical.append(i)
print("Categorical Attribute : {}\n ".format(len(categorical)))
for x in range(len(categorical)): 
    print(categorical[x])

In [None]:
data[categorical].nunique()

In [None]:
num = []
for i in data.columns:
    if (data[i].dtype!='object'):
        num.append(i)
print("Numerical Attribute : {}\n ".format(len(num)))
for x in range(len(num)): 
    print(num[x])

In [None]:
dataset.columns

In [None]:
promotion = setup(data = data, target = 'Y',
                  session_id=1,
                  train_size = 0.8, # training over 80% of available data
                  ordinal_features = {'time' : ["7AM","10AM","2PM", "6PM","10PM"],
                                      'age': ['below21','21',"26", "31","36","41", "46", "50plus" ],
                                     'education' : ["Some High School","High School Graduate","Some college - no degree","Associates degree",
                                                   "Bachelors degree",  "Graduate degree (Masters or Doctorate)"],
                                     'income': ['Less than $12500','$12500 - $24999','$25000 - $37499','$37500 - $49999',
                                                '$50000 - $62499','$62500 - $74999',  '$75000 - $87499','$87500 - $99999',
                                                '$100000 or More'],
                                     'Bar': ['never', 'less1', '1~3', '4~8','gt8' ],
                                      'CoffeeHouse': ['never', 'less1', '1~3', '4~8','gt8' ],
                                      'CarryAway': ['never', 'less1', '1~3', '4~8','gt8' ],
                                      'RestaurantLessThan20': ['never', 'less1', '1~3', '4~8','gt8' ],
                                      'Restaurant20To50': ['never', 'less1', '1~3', '4~8','gt8' ],
                                     }, #ordinal feature
                  categorical_features=['destination','passanger','weather','coupon',
                                       'age','maritalStatus','direction_same','direction_opp','expiration',
                                       'has_children'], #categorical features
                  transformation = True,#Transformation changes the shape of the distribution such that the transformed data can be represented by normal distribution
                  normalize = True, #rescale the values of numeric columns
                  handle_unknown_categorical = True, 
                  unknown_categorical_method = 'most_frequent',
                  remove_multicollinearity = True, #rop one of the two features that are highly correlated with each other
                  ignore_low_variance = True,#all categorical features with statistically insignificant variances are removed from the dataset.
                  combine_rare_levels = True,# all levels in categorical features below the threshold defined in rare_level_threshold param are combined together as a single level
                numeric_imputation='median',
                  categorical_imputation='mode',
                 )

In [None]:
catboost= create_model('catboost')

In [None]:
print(catboost)

In [None]:
tuned_catboost = tune_model(catboost,optimize = 'Precision') #tuned on Precision

In [None]:
evaluate_model(tuned_catboost) #Graphical plot 

In [None]:
predict_model(tuned_catboost)# Test data evaluation 

In [None]:
final_catboost = finalize_model(tuned_catboost) # Final model 
final_catboost

In [None]:
predict_model(final_catboost)#final test model evaluation 

In [None]:
unseen_predictions = predict_model(final_catboost, data=data_unseen)#evaluation on unseen data
unseen_predictions.head()

In [None]:
print("Confidence Score :   {}".format(round(unseen_predictions.Score.mean(),2)))#Confidence Score