<a href="https://colab.research.google.com/github/radrams/olist_predict_clv_segments/blob/master/LTV_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import random

# makes the random numbers predictable so that same set of numbers will appear every time
from pygments.lexers import go
import plotly.offline as pyoff
import plotly.graph_objs as go

np.random.seed(42)
random.seed(42)
warnings.filterwarnings('ignore')

# Make the default figures a bit bigger
plt.rcParams['figure.figsize'] = (7,4.5)
plt.rcParams["figure.dpi"] = 140

# Plotting parameters
sns.set(style="ticks")
sns.set_context("poster", font_scale = .5, rc={"grid.linewidth": 5})

# Read the data
df1 = pd.read_csv('./data/olist_orders_dataset.csv')
df2 = pd.read_csv('./data/olist_customers_dataset.csv')
df3 = pd.read_csv('./data/olist_order_payments_dataset.csv')
df4 = pd.read_csv('./data/olist_order_items_dataset.csv')

# Get only the required columns
cols = ['order_id', 'customer_id', 'order_purchase_timestamp']
orders = df1[cols]
# Filter only the unique customer ids, but no duplicates in the data
# orders.drop_duplicates(inplace=True)
orders = orders.set_index('order_id')

# Get only the required columns
cols = ['order_id', 'price']
order_items = df4[cols]
# we need even duplicates to calculate the price
order_items = order_items.set_index('order_id')

orders_date_price = pd.merge(orders, order_items, on='order_id')

# converting the type of order_purchase_timestamp Field from string to datetime.
orders_date_price['PurchaseTimeStamp'] = pd.to_datetime(orders_date_price['order_purchase_timestamp'])

# creating YearMonth field for the ease of reporting and visualization
orders_date_price['PurchaseYear'] = orders_date_price['PurchaseTimeStamp'].map(lambda date: date.year)
orders_date_price['PurchaseYearMonth'] = orders_date_price['PurchaseTimeStamp'].map(lambda date: 100*date.year + date.month)
orders_date_price['PurchaseYearMonthDay'] = orders_date_price['PurchaseTimeStamp'].map(lambda date: (100*date.year + date.month)*100 + date.day)

# calculate Revenue for each row and create a new dataframe with YearMonth - Revenue columns
orders_date_price['Revenue'] = orders_date_price['price']
orders_revenue = orders_date_price.groupby(['PurchaseYearMonth'])['Revenue'].sum().reset_index()
orders_revenue_day = orders_date_price.groupby(['PurchaseYearMonthDay'])['Revenue'].sum().reset_index()

# Monthwise Revenue
#X and Y axis inputs for Plotly graph. We use Scatter for line graphs
plot_data = [go.Scatter(
                        x=orders_revenue['PurchaseYearMonth'],
                        y=orders_revenue['Revenue'],
                        )
            ]
plot_layout = go.Layout(
                        xaxis={"type": "category",  "tickangle": 35,'title': "Period"},
                        title='Monthwise Revenue',
                        yaxis= {'title': "Revenue in R$"},
                        )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)


  import pandas.util.testing as tm


In [2]:
# Daywise Revenue
#X and Y axis inputs for Plotly graph. We use Scatter for line graphs
plot_data = [go.Scatter(
                        x=orders_revenue_day['PurchaseYearMonthDay'],
                        y=orders_revenue_day['Revenue'],
                        )
            ]

plot_layout = go.Layout(
                        xaxis={"type": "category",  "tickangle": 270, 'title': "Period"},
                        title='Daywise Revenue',
                        yaxis= {'title': "Revenue in R$"},
                        )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)

In [3]:

# To get active customers
cols = ['customer_id', 'customer_unique_id']
customers = df2[cols]
customers = customers.set_index('customer_id')

# Get only the required columns
cols = ['order_id', 'customer_id', 'order_purchase_timestamp']
cust_orders = df1[cols]
cust_orders = cust_orders.set_index('customer_id')

customers_orders = pd.merge(cust_orders, customers, on='customer_id')

customers_orders['PurchaseTimeStamp'] = pd.to_datetime(customers_orders['order_purchase_timestamp'])
customers_orders['UniqueCustomerID'] = customers_orders['customer_unique_id']
customers_orders['PurchaseYear'] = customers_orders['PurchaseTimeStamp'].map(lambda date: date.year)
customers_orders['PurchaseYearMonth'] = customers_orders['PurchaseTimeStamp'].map(lambda date: 100*date.year + date.month)

cols = ['UniqueCustomerID', 'PurchaseYear', 'PurchaseYearMonth']
active_customers = customers_orders[cols]
active_customers.drop_duplicates(subset="UniqueCustomerID", inplace=True)
monthly_active_customers = active_customers.groupby(['PurchaseYearMonth'])['UniqueCustomerID'].nunique().reset_index()
yearly_active_customers = active_customers.groupby(['PurchaseYear'])['UniqueCustomerID'].nunique().reset_index()

# Monthly active customers
#X and Y axis inputs for Plotly graph. We use Scatter for line graphs
plot_data = [go.Bar(
                        x=monthly_active_customers['PurchaseYearMonth'],
                        y=monthly_active_customers['UniqueCustomerID'],
                        )
            ]

plot_layout = go.Layout(
                        xaxis={"type": "category",  "tickangle": -45,'title': "Period"},
                        title='Monthly Active Customers',
                        yaxis= {'title': "No. of Customers"},
                        )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)

In [4]:

# Yearly active customers
#X and Y axis inputs for Plotly graph. We use Scatter for line graphs
plot_data = [go.Bar(
                        x=yearly_active_customers['PurchaseYear'],
                        y=yearly_active_customers['UniqueCustomerID'],
                        width=[0.3, 0.3, 0.3],
                        marker_color=['darkblue', 'darkcyan', 'crimson'],
                        text=yearly_active_customers['UniqueCustomerID'],
                        textposition='outside'
            )
            ]

plot_layout = go.Layout(
                        xaxis={"type": "category",  "tickangle": -45, 'title': "Period"},
                        title='Yearly Active Customers',
                        yaxis= {'title': "No. of Customers"},                    
                        )

fig = go.Figure(data=plot_data, layout=plot_layout)
fig.update_layout(
    autosize=False,
    width=800,
    height=800,
    bargap=0,
)
fig.show
pyoff.iplot(fig)


In [5]:

# Monthly Order Count
cols = ['order_id', 'PurchaseYear', 'PurchaseYearMonth']
total_orders = customers_orders[cols]
monthly_orders = total_orders.groupby(['PurchaseYearMonth'])['order_id'].nunique().reset_index()
yearly_orders = total_orders.groupby(['PurchaseYear'])['order_id'].nunique().reset_index()

# Monthly orders
plot_data = [go.Bar(
                        x=monthly_orders['PurchaseYearMonth'],
                        y=monthly_orders['order_id'],
                        )
            ]

plot_layout = go.Layout(
                        xaxis={"type": "category",  "tickangle": -45, 'title': "Month"},
                        title='Monthly Orders',
                        yaxis= {'title': "No. of Orders"},      
                        )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)

# Yearly orders
#X and Y axis inputs for Plotly graph. We use Scatter for line graphs
plot_data = [go.Bar(
                        x=yearly_orders['PurchaseYear'],
                        y=yearly_orders['order_id'],
                        width=[0.3, 0.3, 0.3],
                        marker_color=['darkblue', 'darkcyan', 'crimson'],
                        text=yearly_orders['order_id'],
                        textposition='outside'
            )
            ]

plot_layout = go.Layout(
                        xaxis={"type": "category",  "tickangle": -45,},
                        title='Yearly Orders',
                        )

fig = go.Figure(data=plot_data, layout=plot_layout)
fig.update_layout(
    autosize=False,
    width=800,
    height=800,
    bargap=0,
)
fig.show
pyoff.iplot(fig)