# OBJECTIVE
An international e-commerce company based wants to discover key insights from their customer database. They want to use some of the most advanced machine learning techniques to study their customers. The company sells electronic products.
Here our moto is to predict if the order was delivered on time.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
warnings.filterwarnings('ignore')
data=pd.read_csv('../input/customer-analytics/Train.csv')
data.head()

In [None]:
label=data['Reached.on.Time_Y.N']

ID doesn't seems to have any importance so we could easily drop that columns

In [None]:
data.drop(['ID','Reached.on.Time_Y.N'],axis=1,inplace=True)

In [None]:
data.info()

NO missing values in any column of the data.

### Numerical data 

In [None]:
data.describe()

### Categorical Data 

In [None]:
data.describe(include='object')

## Warehouse_block 

In [None]:
plt.pie(data.Warehouse_block.value_counts(),explode=[.8,.3,.2,.1,.1],startangle=90,autopct='%.2f%%',labels=['F','D','A','B','C'],radius=10,colors=['blue','pink','red','yellow','green'])
plt.axis('equal')
plt.title('Warehouse Block',fontdict={'fontsize':22,'fontweight':'bold'})
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.countplot(data.Warehouse_block,hue=label)
plt.show()

In [None]:
data.Warehouse_block=pd.Categorical(data.Warehouse_block,categories=['A','B','C','D','F'],ordered=True).codes

## Mode of shipment 

In [None]:
plt.pie(data.Mode_of_Shipment.value_counts(),explode=[.2,.1,.1],startangle=90,autopct='%.2f%%',labels=['ship','flight','Road'],radius=10,)
plt.axis('equal')
plt.title('Mode of shipment',fontdict={'fontsize':20,'fontweight':'bold'})
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.countplot(data.Mode_of_Shipment,hue=label,palette='Dark2_r')
plt.show()

In [None]:
data.Mode_of_Shipment=pd.Categorical(data.Mode_of_Shipment,categories=['Ship','Flight','Road'],ordered=True).codes

## Importance Product

In [None]:
plt.pie(data.Product_importance.value_counts(),explode=[.5,.1,.4],startangle=90,autopct='%.2f%%',labels=['low','medium','High'],radius=10,colors=['#ff00ff','#00ff00','yellow'])
plt.axis('equal')
plt.title('Importance of product',fontdict={'fontsize':20,'fontweight':'bold'})
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.countplot(data.Product_importance,hue=label,palette='Dark2')
plt.show()

In [None]:
data.Product_importance=pd.Categorical(data.Product_importance,categories=['low','medium','High'],ordered=True).codes

## Gender

In [None]:
plt.pie(data.Gender.value_counts(),explode=[.1,.3],startangle=90,autopct='%.2f%%',labels=['female','male'],radius=10,colors=['blue','pink'])
plt.axis('equal')
plt.title('Gender',fontdict={'fontsize':22,'fontweight':'bold'})
plt.show()

In [None]:
data.Gender.value_counts()

In [None]:
plt.figure(figsize=(10,8))
sns.countplot(data.Gender,hue=label,palette='Paired')
plt.show()

In [None]:
data.Gender=pd.Categorical(data.Gender,categories=['M','F'],ordered=True).codes

## Multi-variate annalysis

In [None]:

sns.pairplot(data)
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.heatmap(data.corr(),annot=True,vmin=-1)
plt.plot()

## Spliting of data 

In [None]:
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

In [None]:
from sklearn.model_selection import train_test_split
train_x,test_x,train_y,test_y=train_test_split(data,label,test_size=.2,random_state=42)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
model1=LogisticRegression(tol=.01)
model1.fit(train_x,train_y)

In [None]:
train_pred=model1.predict(train_x)
test_pred=model1.predict(test_x)
print('Classification Report of train_data \n',classification_report(train_y,train_pred))
print('Classification Report of test_data \n',classification_report(test_y,test_pred))

## DecisionTreeClassifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
DTC=DecisionTreeClassifier()
model2=GridSearchCV(DTC,param_grid={'max_depth':range(4,13),'criterion':['gini','entropy']})
model2.fit(train_x,train_y)

In [None]:
model2.best_params_

In [None]:
train_pred=model2.predict(train_x)
test_pred=model2.predict(test_x)
print('Classification Report of train_data \n',classification_report(train_y,train_pred))
print('Classification Report of test_data \n',classification_report(test_y,test_pred))

## Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
RFC=RandomForestClassifier(random_state=42)
model3=GridSearchCV(RFC,param_grid={'max_depth':range(4,13),'criterion':['gini','entropy']})
model3.fit(train_x,train_y)

In [None]:
model3.best_params_

In [None]:
train_pred=model3.predict(train_x)
test_pred=model3.predict(test_x)
print('Classification Report of train_data \n',classification_report(train_y,train_pred))
print('Classification Report of test_data \n',classification_report(test_y,test_pred))

## AdaBoostClassifier

In [None]:
from sklearn.ensemble import AdaBoostClassifier
model4=AdaBoostClassifier(n_estimators=100)
model4.fit(train_x,train_y)

In [None]:
train_pred=model4.predict(train_x)
test_pred=model4.predict(test_x)
print('Classification Report of train_data \n',classification_report(train_y,train_pred))
print('Classification Report of test_data \n',classification_report(test_y,test_pred))

## KNeighborsClassifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
KNN=KNeighborsClassifier()
model5=GridSearchCV(estimator=KNN,param_grid={'n_neighbors':range(5,12)})
model5.fit(train_x,train_y)

In [None]:
model5.best_params_

In [None]:
train_pred=model5.predict(train_x)
test_pred=model5.predict(test_x)
print('Classification Report of train_data \n',classification_report(train_y,train_pred))
print('Classification Report of test_data \n',classification_report(test_y,test_pred))

### Conclusion:- Every Algoriths works quite well. With highest test accuracy of about 69% , Adaboost and Randomforest can be prefered model here for provided dataset.