In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

import warnings
warnings.filterwarnings('ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Load Data

In [None]:
df= pd.read_csv('/kaggle/input/customer-analytics/Train.csv')
df.head()

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.describe()

In [None]:
df.drop('ID', axis=1, inplace=True)

In [None]:
df.head()

# Exploratory Data Analysis

In [None]:
cat_cols= ['Warehouse_block','Mode_of_Shipment', 'Product_importance', 'Gender' ]

In [None]:
plt.figure(figsize=(15,10))
i=1
for col in cat_cols:
    plt.subplot(2,2,i)
    sns.countplot(df[col])
    i+=1

In [None]:
i = 1
plt.figure(figsize=(15,10))
for col in ['Cost_of_the_Product', 'Weight_in_gms', 'Discount_offered']:
    plt.subplot(2,2,i)
    sns.distplot(df[col])
    i+=1

In [None]:
i=1
plt.figure(figsize=(15,10))
for col in ['Customer_care_calls', 'Customer_rating', 'Prior_purchases']:
    plt.subplot(2,2,i)
    sns.countplot(df[col], hue=df['Reached.on.Time_Y.N'])
    i+=1

In [None]:
df['Discount_offered'].hist()

In [None]:
sns.pairplot(df)

In [None]:
df= pd.get_dummies(df)

In [None]:
df.head()

In [None]:
df.drop('Gender_F', axis=1, inplace=True)
df.head()

In [None]:
plt.figure(figsize=(15,12))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')

# Preprocessing

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
X= df.drop('Reached.on.Time_Y.N', axis=1)
y= df['Reached.on.Time_Y.N']

X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, stratify=y)

In [None]:
ss= StandardScaler()
X_train= ss.fit_transform(X_train)
X_test=ss.transform(X_test)

# Training Model using ML Algorithms

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
key= ['LogisticRegression', 'DecisionTreeClassifier', 'RandomForestClassifier', 'KNeighborsClassifier', 'XGBClassifier', 'SVC']
value= [LogisticRegression(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier(), XGBClassifier(), SVC()]

models= dict(zip(key, value))

In [None]:
for key,value in models.items():
    value.fit(X_train, y_train)
    pred= value.predict(X_test)
    print(key)
    print(classification_report(y_test, pred))
    print(confusion_matrix(y_test, pred))
        

# Using Neural Networks

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils.np_utils import to_categorical

In [None]:
X_train_arr= np.array(X_train)
y_train_arr= np.array(to_categorical(y_train))
X_test_arr= np.array(X_test)
y_test_arr= np.array(to_categorical(y_test))


In [None]:
X_train_arr.shape

In [None]:
y_train.shape

In [None]:
X_test_arr.shape

In [None]:
from keras.optimizers import Adam
def create_model(activation, learning_rate):
    
    model= Sequential()
    model.add(Dense(100, activation=activation, kernel_initializer='normal', input_shape=(18,)))
    model.add(Dense(50, activation=activation))
    model.add(Dense(25, activation=activation))
    model.add(Dense(1, activation='sigmoid'))
    my_opt= Adam(lr=learning_rate)

    model.compile(optimizer=my_opt, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier

model= KerasClassifier(build_fn= create_model)

In [None]:
from sklearn.model_selection import RandomizedSearchCV

params= {'epochs': [20,30,40,50],
        'batch_size': [220,330,440],
        'activation':['relu', 'tanh'],
        'learning_rate':[0.001, 0.01, 0.1,1]}

random= RandomizedSearchCV(model, param_distributions= params, cv=5)
random.fit(X_train, y_train)

In [None]:
random.best_estimator_

In [None]:
random.best_estimator_.score(X_test, y_test)

# The accuracy on test set is still 65%. 

# Upvote and Comment if you liked :)