In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
## Customer Life Time Value Prediction
## Market basket analysis

### Customer Life Time Value Prediction
- Machine Learning Modelling Approach

In [None]:
retail = pd.read_csv('/kaggle/input/manipal/OnlineRetail.csv')
retail['InvoiceDate'] = pd.to_datetime(retail['InvoiceDate'], format='%d-%m-%Y %H:%M')
retail['year_month'] = retail['InvoiceDate'].dt.strftime('%Y-%m')
retail['TotalPrice'] = retail['UnitPrice'] * retail['Quantity']
retail.shape

In [None]:
credit_invoices = retail[retail['InvoiceNo'].str.startswith('C')]['InvoiceNo'].unique()
credit_invoices_without_c = retail[retail['InvoiceNo'].str.startswith('C')]['InvoiceNo'].str[1:].unique()

adjust_invoices = retail[retail['InvoiceNo'].str.startswith('A')]['InvoiceNo'].unique()
adjust_invoices_without_a = retail[retail['InvoiceNo'].str.startswith('A')]['InvoiceNo'].str[1:].unique()

remove_invoices = np.hstack([credit_invoices, credit_invoices_without_c,
                             adjust_invoices, adjust_invoices_without_a])
retail_subset = retail[(~retail['InvoiceNo'].isin(remove_invoices))]


q1 = retail_subset['UnitPrice'].quantile(0.25)
q3 = retail_subset['UnitPrice'].quantile(0.75)
iqr = q3 - q1
lw = q1 - 1.5 * iqr
uw = q3 + 1.5 * iqr
retail_subset = retail_subset[retail_subset['UnitPrice'] <= 3000]
retail_subset = retail_subset[~retail_subset['CustomerID'].isnull()]

In [None]:
retail_subset[['TotalPrice', 'year_month', 'CustomerID']].head()

In [None]:
retail_pivot = retail_subset.pivot_table(index='CustomerID',
                                         columns='year_month',
                                         values='TotalPrice',
                                        aggfunc='sum').fillna(0)
retail_pivot['cltv'] = retail_pivot.sum(axis=1)
retail_pivot.head()

In [None]:
input_cols = ["2010-12", "2011-01", "2011-02", "2011-03", "2011-04", "2011-05", "2011-06"]
target_col = ['cltv']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

train_x, test_x, train_y, test_y = train_test_split(retail_pivot[input_cols],
                                                    retail_pivot[target_col],
                                                    test_size=0.2, random_state=1)
model = LinearRegression().fit(train_x, train_y)
test_y_pred = model.predict(test_x)
print('RMSE:', mean_squared_error(test_y, test_y_pred, squared=False))
print('R2:', r2_score(test_y, test_y_pred))

### Market Basket Analysis

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
purchases = pd.DataFrame({'Basket': [1,1,1,2,2,2, 3,3,3],
                          'Item': ['Milk', 'Bread', 'Cereal', 'Milk', 'Bread', 'Jam', 'Egg', 'Bread', 'Butter'],
                          'Quantity': [1,1,1,1,1,1,1,1,1] })
purchases_pivot = purchases.pivot_table(index='Basket', columns='Item', values='Quantity', aggfunc='sum')
purchases_pivot = purchases_pivot.fillna(0)
purchases_pivot

In [None]:
support = apriori(purchases_pivot, min_support=0.0001, use_colnames=True)
support

In [None]:
rules = association_rules(support)
rules[['antecedents', 'consequents', 'support', 'lift']]

In [None]:
summary = retail_subset.pivot_table(index='InvoiceNo',
                                    columns='Description',
                                    values='Quantity',
                                    aggfunc='sum').fillna(0)
summary = summary.astype(bool).astype(int)

In [None]:
support = apriori(summary, min_support=0.3, use_colnames=True)
support