# Customer Propensity Model

## Importing Libraries

In [None]:
import pandas
import seaborn as sns
import matplotlib.pylab as plt

## Loading the Dataset

In [None]:
train = pandas.read_csv('dataset/training_sample.csv')

## Data Cleaning and Segregation

### Exploring the Dataset

In [None]:
print(train.shape)
print(train.dtypes)

In [None]:
print(train.describe())
print(train.info())

In [None]:
train.head()

### Examining Field Correlations

In [None]:
correlation = train.corr()
plt.figure(figsize=(16, 14))
sns.heatmap(correlation, vmax=0.6, center=0, square=True, linewidths=2, cmap='Blues')
plt.savefig('heatmap.png')
plt.show()

In [None]:
train.corr()['ordered'].sort_values(ascending=False)

### Defining Predictors

In [None]:
predictors = train.select_dtypes(include=['number'])
predictors = predictors.drop(['ordered', 'device_mobile'], axis=1)

In [None]:
print(predictors.columns.size)
print(predictors.columns)

### Defining Labels

In [None]:
targets = train.ordered
print(targets)

## Training the Model

In [None]:
from sklearn.model_selection import train_test_split
import sklearn.metrics

In [None]:
x_train, x_test, y_train, y_test = train_test_split(predictors, targets, test_size=0.3)
print(x_train.shape, x_test.shape)

In [None]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier = classifier.fit(x_train, y_train)

predictions = classifier.predict(x_test)

sklearn.metrics.confusion_matrix(y_test, predictions)

## Training Accuracy

In [None]:
sklearn.metrics.accuracy_score(y_true=y_test, y_pred=predictions)

## Making Predictions on Test Sample

In [None]:
yesterday_prospects = pandas.read_csv('dataset/testing_sample.csv')

In [None]:
print(yesterday_prospects.info())

In [None]:
userIDs = yesterday_prospects.UserID
yesterday_prospects = yesterday_prospects.drop(['ordered', 'UserID', 'device_mobile'], axis=1)

In [None]:
print(yesterday_prospects.shape)

In [None]:
yesterday_prospects['propensity'] = classifier.predict_proba(yesterday_prospects)[:, 1]
print(yesterday_prospects.head(10))

In [None]:
pandas.DataFrame(userIDs)
result = pandas.concat([userIDs, yesterday_prospects], axis=1)
print(result.head(20))

In [None]:
result.to_csv('customer-propensity-result.csv')