In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import glob
import mysql.connector
from mysql.connector import Error

## FvB

In [3]:
%run ./cohorts_pipeline_fvb_v3.py

In [5]:
df_backend = pd.read_csv('./Data/Orders_Backend.csv', delimiter=';', decimal='.')

In [6]:
df_backend.head()

Unnamed: 0,Ordernummer,Orderdatum,Klant voornaam,Klant achternaam,E-mail,Aflevermethode,Totaalprijs,Kanaal,Locale,Unnamed: 9,Status
0,DE01367852,2021-01-11 08:44:54,Alexander,Meiners,alexander_meiners@web.de,Thuisbezorgd,313.92,Floris van Bommel webshop Duitsland,de_DE,Nog te alloceren,Nog te alloceren
1,DE01367851,2021-01-11 08:29:07,Olaf,Pfeifer,pfeifer@gigaron.de,Thuisbezorgd,269.85,Floris van Bommel webshop Duitsland,de_DE,Levering bezig,Levering bezig
2,DE01367850,2021-01-11 07:51:48,Mark,van Ommeren,markvanommeren@icloud.com,Thuisbezorgd,269.95,Floris van Bommel webshop Duitsland,de_DE,Levering bezig,Levering bezig
3,DE01367748,2021-01-11 04:50:29,Hubert,Kinzler,hkinzler@web.de,Thuisbezorgd,167.96,Floris van Bommel webshop Duitsland,de_DE,Levering bezig,Levering bezig
4,DE01367659,2021-01-11 00:46:23,Borche,Siljanoski,Borce.siljanoski@gmail.com,Thuisbezorgd,139.96,Floris van Bommel webshop Duitsland,de_DE,Levering bezig,Levering bezig


In [7]:
df_backend = df_cleaning_source(df_backend)

In [8]:
df_backend.head()

Unnamed: 0,Merchant_Reference,Creation_Date,First_Name,Last_Name,Email,Revenue,Market
0,DE01367852,2021-01-11 08:44:54,Alexander,Meiners,alexander_meiners@web.de,313.92,de_DE
1,DE01367851,2021-01-11 08:29:07,Olaf,Pfeifer,pfeifer@gigaron.de,269.85,de_DE
2,DE01367850,2021-01-11 07:51:48,Mark,van Ommeren,markvanommeren@icloud.com,269.95,de_DE
3,DE01367748,2021-01-11 04:50:29,Hubert,Kinzler,hkinzler@web.de,167.96,de_DE
4,DE01367659,2021-01-11 00:46:23,Borche,Siljanoski,Borce.siljanoski@gmail.com,139.96,de_DE


In [9]:
df_backend = first_order(df_backend)

In [10]:
df_backend.head()

Unnamed: 0,Email,Merchant_Reference,Creation_Date,First_Name,Last_Name,Revenue,Market,Creation_Date_YM,First_Order,First_Order_YM,Customer_Type
0,alexander_meiners@web.de,DE01367852,2021-01-11 08:44:54,Alexander,Meiners,313.92,de_DE,2021-01,2017-01-10 15:46:44,2017-01,Returning
1,pfeifer@gigaron.de,DE01367851,2021-01-11 08:29:07,Olaf,Pfeifer,269.85,de_DE,2021-01,2016-05-30 17:01:13,2016-05,Returning
2,markvanommeren@icloud.com,DE01367850,2021-01-11 07:51:48,Mark,van Ommeren,269.95,de_DE,2021-01,2021-01-11 07:51:48,2021-01,New
3,hkinzler@web.de,DE01367748,2021-01-11 04:50:29,Hubert,Kinzler,167.96,de_DE,2021-01,2021-01-03 12:14:08,2021-01,Returning
4,borce.siljanoski@gmail.com,DE01367659,2021-01-11 00:46:23,Borche,Siljanoski,139.96,de_DE,2021-01,2020-08-26 11:37:59,2020-08,Returning


In [11]:
# Setup maximum date for Recency calculation on training dataset
max_date = df_backend.Creation_Date.max()


In [12]:
# Create the RFM table
rfm = df_backend.groupby('Email').agg({'Creation_Date': lambda x: (max_date - x.max()).days,
                                                   'Merchant_Reference': lambda num: len(num),
                                                   'Revenue': lambda price: price.mean()})

In [13]:
# Reset Index
rfm.reset_index(inplace=True)

# Change the name of the columns
rfm.columns=['email','recency','frequency','monetary_value']

In [14]:
# Purchase Frequency
purchase_frequency=sum(rfm['frequency'])/rfm.shape[0]

In [15]:
# Repeat Rate
repeat_rate=rfm[rfm.frequency > 1].shape[0]/rfm.shape[0]

In [16]:
# Churn Rate
churn_rate=1-repeat_rate

In [17]:
purchase_frequency,repeat_rate,churn_rate

(1.9131350246681667, 0.368639431909667, 0.6313605680903329)

In [18]:
# Customer Value
rfm['CLV']=(rfm['monetary_value']*purchase_frequency)/churn_rate

In [19]:
active = rfm[rfm.recency<=180].email.unique()
inactive = rfm[rfm.recency>180].email.unique()

In [20]:
active_list = df_backend[df_backend.Email.isin(active)]
inactive_list = df_backend[df_backend.Email.isin(inactive)]

In [21]:
active_list.shape, inactive_list.shape

((25491, 11), (67187, 11))

In [22]:
active_list.to_csv('./Outputs/FvB_Active_13012021.csv', index=False)
inactive_list.to_csv('./Outputs/FvB_Inactive_13012021.csv', index=False)

In [23]:
df_backend['Year'] = df_backend.Creation_Date.dt.year.astype(int)

In [24]:
df_backend['Quarter'] = df_backend.Creation_Date.dt.quarter.astype(int)

In [25]:
trans_summary_de =  pd.DataFrame()
trans_summary_at =  pd.DataFrame()

trans_month_de = df_backend[df_backend['Market']=='de_DE'].groupby(['Creation_Date_YM','Customer_Type']).agg({'Merchant_Reference':pd.Series.nunique, 'Revenue':pd.Series.sum}).unstack(1).fillna(0).reset_index()
trans_month_de.rename(columns={'Merchant_Reference':'All_Orders','Value':'All_Revenue'},inplace=True)

trans_month_at = df_backend[df_backend['Market']=='de_AT'].groupby(['Creation_Date_YM','Customer_Type']).agg({'Merchant_Reference':pd.Series.nunique, 'Revenue':pd.Series.sum}).unstack(1).fillna(0).reset_index()
trans_month_at.rename(columns={'Merchant_Reference':'All_Orders','Value':'All_Revenue'},inplace=True)

In [26]:
trans_month_de = trans_month_de.set_index('Creation_Date_YM')

In [27]:
trans_month_de.columns = trans_month_de.columns.droplevel()

In [28]:
trans_month_de.columns = ['New_Orders','Ret_Orders','New_Rev','Ret_Rev']

In [29]:
trans_month_de['AOV_New'] = trans_month_de['New_Rev']/trans_month_de['New_Orders']
trans_month_de['AOV_Ret'] = trans_month_de['Ret_Rev']/trans_month_de['Ret_Orders']

In [30]:
trans_month_de = trans_month_de.astype(int)

In [31]:
trans_month_de.to_clipboard()

In [32]:
trans_month_de

Unnamed: 0_level_0,New_Orders,Ret_Orders,New_Rev,Ret_Rev,AOV_New,AOV_Ret
Creation_Date_YM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01,48,4,20231,1579,421,394
2016-02,254,41,89204,12404,351,302
2016-03,367,78,120661,22638,328,290
2016-04,387,114,121669,30574,314,268
2016-05,352,96,114144,28325,324,295
...,...,...,...,...,...,...
2020-09,656,847,207007,272540,315,321
2020-10,943,998,308536,348214,327,348
2020-11,1095,1202,325332,359377,297,298
2020-12,1204,1292,287158,315556,238,244


## Dumping Data

In [31]:
from gspread_pandas import Spread, Client

In [32]:
fvb_mt_sheet = Spread('Data Dump - PC + FvB MT')

In [33]:
# Push Transactions
fvb_mt_sheet.df_to_sheet(trans_month_de, index=True, sheet='FvB', start='A1', replace=True)