In [1]:
import pandas as pd  #importing pandas to create and work on dataframes
import numpy as np   #for creating numpy arrays
import seaborn as sns #for visualising the data

from sklearn import metrics  #metrics are used to evaluate the model
from sklearn.preprocessing import LabelEncoder   # labelencoder is used to convert the data type from object to categorical

import warnings 

from sklearn import svm
from sklearn.svm import LinearSVC

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,r2_score,mean_squared_error #various evaluation methods

warnings.filterwarnings("ignore")  # to ignore the warnings

In [2]:
df = pd.read_excel('marketing_campaign.xlsx')  # reading the data as a dataframe using pandas

In [3]:
df #to have an overview of how the data looks

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
0,5524,1957,Graduation,Single,58138.0,0,0,2012-09-04,58,635,...,7,0,0,0,0,0,0,3,11,1
1,2174,1954,Graduation,Single,46344.0,1,1,2014-03-08,38,11,...,5,0,0,0,0,0,0,3,11,0
2,4141,1965,Graduation,Together,71613.0,0,0,2013-08-21,26,426,...,4,0,0,0,0,0,0,3,11,0
3,6182,1984,Graduation,Together,26646.0,1,0,2014-02-10,26,11,...,6,0,0,0,0,0,0,3,11,0
4,5324,1981,PhD,Married,58293.0,1,0,2014-01-19,94,173,...,5,0,0,0,0,0,0,3,11,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2235,10870,1967,Graduation,Married,61223.0,0,1,2013-06-13,46,709,...,5,0,0,0,0,0,0,3,11,0
2236,4001,1946,PhD,Together,64014.0,2,1,2014-06-10,56,406,...,7,0,0,0,1,0,0,3,11,0
2237,7270,1981,Graduation,Divorced,56981.0,0,0,2014-01-25,91,908,...,6,0,1,0,0,0,0,3,11,0
2238,8235,1956,Master,Together,69245.0,0,1,2014-01-24,8,428,...,3,0,0,0,0,0,0,3,11,0


In [4]:
col_list = list(df.columns)
col_list

['ID',
 'Year_Birth',
 'Education',
 'Marital_Status',
 'Income',
 'Kidhome',
 'Teenhome',
 'Dt_Customer',
 'Recency',
 'MntWines',
 'MntFruits',
 'MntMeatProducts',
 'MntFishProducts',
 'MntSweetProducts',
 'MntGoldProds',
 'NumDealsPurchases',
 'NumWebPurchases',
 'NumCatalogPurchases',
 'NumStorePurchases',
 'NumWebVisitsMonth',
 'AcceptedCmp3',
 'AcceptedCmp4',
 'AcceptedCmp5',
 'AcceptedCmp1',
 'AcceptedCmp2',
 'Complain',
 'Z_CostContact',
 'Z_Revenue',
 'Response']

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2240 entries, 0 to 2239
Data columns (total 29 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ID                   2240 non-null   int64  
 1   Year_Birth           2240 non-null   int64  
 2   Education            2240 non-null   object 
 3   Marital_Status       2240 non-null   object 
 4   Income               2216 non-null   float64
 5   Kidhome              2240 non-null   int64  
 6   Teenhome             2240 non-null   int64  
 7   Dt_Customer          2240 non-null   object 
 8   Recency              2240 non-null   int64  
 9   MntWines             2240 non-null   int64  
 10  MntFruits            2240 non-null   int64  
 11  MntMeatProducts      2240 non-null   int64  
 12  MntFishProducts      2240 non-null   int64  
 13  MntSweetProducts     2240 non-null   int64  
 14  MntGoldProds         2240 non-null   int64  
 15  NumDealsPurchases    2240 non-null   i

In [6]:
df = pd.get_dummies(df , columns = ['Education','Marital_Status'],drop_first = True)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2240 entries, 0 to 2239
Data columns (total 38 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   ID                       2240 non-null   int64  
 1   Year_Birth               2240 non-null   int64  
 2   Income                   2216 non-null   float64
 3   Kidhome                  2240 non-null   int64  
 4   Teenhome                 2240 non-null   int64  
 5   Dt_Customer              2240 non-null   object 
 6   Recency                  2240 non-null   int64  
 7   MntWines                 2240 non-null   int64  
 8   MntFruits                2240 non-null   int64  
 9   MntMeatProducts          2240 non-null   int64  
 10  MntFishProducts          2240 non-null   int64  
 11  MntSweetProducts         2240 non-null   int64  
 12  MntGoldProds             2240 non-null   int64  
 13  NumDealsPurchases        2240 non-null   int64  
 14  NumWebPurchases         

In [8]:
df.isnull().sum()

ID                          0
Year_Birth                  0
Income                     24
Kidhome                     0
Teenhome                    0
Dt_Customer                 0
Recency                     0
MntWines                    0
MntFruits                   0
MntMeatProducts             0
MntFishProducts             0
MntSweetProducts            0
MntGoldProds                0
NumDealsPurchases           0
NumWebPurchases             0
NumCatalogPurchases         0
NumStorePurchases           0
NumWebVisitsMonth           0
AcceptedCmp3                0
AcceptedCmp4                0
AcceptedCmp5                0
AcceptedCmp1                0
AcceptedCmp2                0
Complain                    0
Z_CostContact               0
Z_Revenue                   0
Response                    0
Education_Basic             0
Education_Graduation        0
Education_Master            0
Education_PhD               0
Marital_Status_Alone        0
Marital_Status_Divorced     0
Marital_St

In [9]:
df = df.dropna()

In [10]:
df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'])#convert the date column to date type column, and divide into three columns
df['Year_Customer'] = df['Dt_Customer'].apply(lambda x: x.year)
df['Month_Customer'] = df['Dt_Customer'].apply(lambda x: x.month)
df['Day_Customer'] = df['Dt_Customer'].apply(lambda x: x.day)
df = df.drop('Dt_Customer', axis=1)

In [11]:
df.head()

Unnamed: 0,ID,Year_Birth,Income,Kidhome,Teenhome,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,...,Marital_Status_Alone,Marital_Status_Divorced,Marital_Status_Married,Marital_Status_Single,Marital_Status_Together,Marital_Status_Widow,Marital_Status_YOLO,Year_Customer,Month_Customer,Day_Customer
0,5524,1957,58138.0,0,0,58,635,88,546,172,...,0,0,0,1,0,0,0,2012,9,4
1,2174,1954,46344.0,1,1,38,11,1,6,2,...,0,0,0,1,0,0,0,2014,3,8
2,4141,1965,71613.0,0,0,26,426,49,127,111,...,0,0,0,0,1,0,0,2013,8,21
3,6182,1984,26646.0,1,0,26,11,4,20,10,...,0,0,0,0,1,0,0,2014,2,10
4,5324,1981,58293.0,1,0,94,173,43,118,46,...,0,0,1,0,0,0,0,2014,1,19


In [12]:
df.shape

(2216, 40)

In [13]:
df.columns

Index(['ID', 'Year_Birth', 'Income', 'Kidhome', 'Teenhome', 'Recency',
       'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts',
       'MntSweetProducts', 'MntGoldProds', 'NumDealsPurchases',
       'NumWebPurchases', 'NumCatalogPurchases', 'NumStorePurchases',
       'NumWebVisitsMonth', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5',
       'AcceptedCmp1', 'AcceptedCmp2', 'Complain', 'Z_CostContact',
       'Z_Revenue', 'Response', 'Education_Basic', 'Education_Graduation',
       'Education_Master', 'Education_PhD', 'Marital_Status_Alone',
       'Marital_Status_Divorced', 'Marital_Status_Married',
       'Marital_Status_Single', 'Marital_Status_Together',
       'Marital_Status_Widow', 'Marital_Status_YOLO', 'Year_Customer',
       'Month_Customer', 'Day_Customer'],
      dtype='object')

In [14]:
x = df.drop(labels = ['Response'],axis = 1)
y = df['Response']

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3 , shuffle = 30)

# Implementing Logistic Regression

In [15]:
lr = LogisticRegression()

In [16]:
logreg = lr.fit(x_train,y_train)

In [17]:
y_pred = logreg.predict(x_test)

In [18]:
print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.99      0.87      0.92       645
           1       0.13      0.65      0.22        20

    accuracy                           0.86       665
   macro avg       0.56      0.76      0.57       665
weighted avg       0.96      0.86      0.90       665



In [19]:
confusion_matrix(y_pred,y_test)

array([[561,  84],
       [  7,  13]], dtype=int64)

# Implementing In Support Vector Machine Algorithm

In [20]:
lsvc = LinearSVC(verbose=0)
print(lsvc)

LinearSVC()


In [21]:
lsvc.fit(x_train, y_train)
score = lsvc.score(x_train, y_train)
print("Score: ", score)

Score:  0.8478401031592521


In [22]:
y_pred = lsvc.predict(x_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.85      1.00      0.92       568
           1       0.00      0.00      0.00        97

    accuracy                           0.85       665
   macro avg       0.43      0.50      0.46       665
weighted avg       0.73      0.85      0.79       665



It is found that for the given dataset,logistic regression has better accuracy than the support vector machine algorithm.