### DataSet
   * Brazillian E-commerce Dataset (https://www.kaggle.com/olistbr/brazilian-ecommerce)
   * Using 3 tables data for this analysis customers, orders and order_payments

### Operations performed
   * Loading tables into dataframes customers, orders, order_payments
   * Basic data analysis 
   * Finding missing values
   * Merging tables 
        * customers, orders into as 
        * customers_merged, order_payments into as 
        
### Answering Business Questions
   * Frequency: Finding number of orders placed by each customer
   * Monetary value: Finding total amount paid by each customer 
   * Recency :Finding total number of days before the last order made by each customer
    

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Loading Data

# Customers

In [4]:
customers = pd.read_csv('dataset/olist_customers_dataset.csv')

In [5]:
customers.head()

Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP
1,18955e83d337fd6b2def6b18a428ac77,290c77bc529b7ac935b93aa66c333dc3,9790,sao bernardo do campo,SP
2,4e7b3e00288586ebd08712fdd0374a03,060e732b5b29e8181a18229c7b0b2b5e,1151,sao paulo,SP
3,b2b6027bc5c5109e529d4dc6358b12c3,259dac757896d24d7702b9acbbff3f3c,8775,mogi das cruzes,SP
4,4f2d8ab171c80ec8364f7c12e35b23ad,345ecd01c38d18a9036ed96c73b8d066,13056,campinas,SP


In [6]:
customers.iloc[0][1]

'861eff4711a542e4b93843c6dd7febb0'

In [7]:
customers.loc[0]['customer_id']

'06b8999e2fba1a1fbc88172c00ba8bc7'

In [8]:
customers.columns

Index(['customer_id', 'customer_unique_id', 'customer_zip_code_prefix',
       'customer_city', 'customer_state'],
      dtype='object')

In [9]:
customers['customer_id'].iloc[0]

'06b8999e2fba1a1fbc88172c00ba8bc7'

In [10]:
customers.iloc[0:1,0:1]

Unnamed: 0,customer_id
0,06b8999e2fba1a1fbc88172c00ba8bc7


In [11]:
customers.shape

(99441, 5)

In [12]:
customers['customer_unique_id'].nunique()

96096

In [13]:
customers.customer_id.count

<bound method Series.count of 0        06b8999e2fba1a1fbc88172c00ba8bc7
1        18955e83d337fd6b2def6b18a428ac77
2        4e7b3e00288586ebd08712fdd0374a03
3        b2b6027bc5c5109e529d4dc6358b12c3
4        4f2d8ab171c80ec8364f7c12e35b23ad
                       ...               
99436    17ddf5dd5d51696bb3d7c6291687be6f
99437    e7b71a9017aa05c9a7fd292d714858e8
99438    5e28dfe12db7fb50a4b2f691faecea5e
99439    56b18e2166679b8a959d72dd06da27f9
99440    274fa6071e5e17fe303b9748641082c8
Name: customer_id, Length: 99441, dtype: object>

In [14]:
customers.isna().any()

customer_id                 False
customer_unique_id          False
customer_zip_code_prefix    False
customer_city               False
customer_state              False
dtype: bool

# Orders

In [15]:
orders = pd.read_csv('dataset/olist_orders_dataset.csv')

In [16]:
orders.columns

Index(['order_id', 'customer_id', 'order_status', 'order_purchase_timestamp',
       'order_approved_at', 'order_delivered_carrier_date',
       'order_delivered_customer_date', 'order_estimated_delivery_date'],
      dtype='object')

In [17]:
orders.head()

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18 00:00:00
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13 00:00:00
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04 00:00:00
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15 00:00:00
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26 00:00:00


In [18]:
orders.shape

(99441, 8)

In [19]:
orders['order_id'].nunique()

99441

In [20]:
orders.isna().any()

order_id                         False
customer_id                      False
order_status                     False
order_purchase_timestamp         False
order_approved_at                 True
order_delivered_carrier_date      True
order_delivered_customer_date     True
order_estimated_delivery_date    False
dtype: bool

# Order Payment

In [21]:
order_values = pd.read_csv('dataset/olist_order_payments_dataset.csv')

In [22]:
order_values.columns

Index(['order_id', 'payment_sequential', 'payment_type',
       'payment_installments', 'payment_value'],
      dtype='object')

In [23]:
order_values.head()

Unnamed: 0,order_id,payment_sequential,payment_type,payment_installments,payment_value
0,b81ef226f3fe1789b1e8b2acac839d17,1,credit_card,8,99.33
1,a9810da82917af2d9aefd1278f1dcfa0,1,credit_card,1,24.39
2,25e8ea4e93396b6fa0d3dd708e76c1bd,1,credit_card,1,65.71
3,ba78997921bbcdc1373bb41e913ab953,1,credit_card,8,107.78
4,42fdf880ba16b47b59251dd489d4441a,1,credit_card,2,128.45


In [24]:
order_values.shape

(103886, 5)

In [34]:
order_values.order_id.nunique()

99440

In [36]:
#number of observations in each category of payment_type column
order_values['payment_type'].value_counts()

credit_card    76795
boleto         19784
voucher         5775
debit_card      1529
not_defined        3
Name: payment_type, dtype: int64

In [37]:
order_values.isna().any()

order_id                False
payment_sequential      False
payment_type            False
payment_installments    False
payment_value           False
dtype: bool

## Merging/Join Datasets

In [25]:
customers_merged = pd.merge(customers, orders, on='customer_id')

In [26]:
customers_merged.head()

Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state,order_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP,00e7ee1b050b8499577073aeb2a297a1,delivered,2017-05-16 15:05:35,2017-05-16 15:22:12,2017-05-23 10:47:57,2017-05-25 10:35:35,2017-06-05 00:00:00
1,18955e83d337fd6b2def6b18a428ac77,290c77bc529b7ac935b93aa66c333dc3,9790,sao bernardo do campo,SP,29150127e6685892b6eab3eec79f59c7,delivered,2018-01-12 20:48:24,2018-01-12 20:58:32,2018-01-15 17:14:59,2018-01-29 12:41:19,2018-02-06 00:00:00
2,4e7b3e00288586ebd08712fdd0374a03,060e732b5b29e8181a18229c7b0b2b5e,1151,sao paulo,SP,b2059ed67ce144a36e2aa97d2c9e9ad2,delivered,2018-05-19 16:07:45,2018-05-20 16:19:10,2018-06-11 14:31:00,2018-06-14 17:58:51,2018-06-13 00:00:00
3,b2b6027bc5c5109e529d4dc6358b12c3,259dac757896d24d7702b9acbbff3f3c,8775,mogi das cruzes,SP,951670f92359f4fe4a63112aa7306eba,delivered,2018-03-13 16:06:38,2018-03-13 17:29:19,2018-03-27 23:22:42,2018-03-28 16:04:25,2018-04-10 00:00:00
4,4f2d8ab171c80ec8364f7c12e35b23ad,345ecd01c38d18a9036ed96c73b8d066,13056,campinas,SP,6b7d50bd145f6fc7f33cebabd7e49d0f,delivered,2018-07-29 09:51:30,2018-07-29 10:10:09,2018-07-30 15:16:00,2018-08-09 20:55:48,2018-08-15 00:00:00


In [55]:
customers_merged.shape

(99441, 12)

In [56]:
customers_pays = pd.merge(customers_merged, order_values, on='order_id')

In [57]:
customers_pays.head()

Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state,order_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date,payment_sequential,payment_type,payment_installments,payment_value
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP,00e7ee1b050b8499577073aeb2a297a1,delivered,2017-05-16 15:05:35,2017-05-16 15:22:12,2017-05-23 10:47:57,2017-05-25 10:35:35,2017-06-05 00:00:00,1,credit_card,2,146.87
1,18955e83d337fd6b2def6b18a428ac77,290c77bc529b7ac935b93aa66c333dc3,9790,sao bernardo do campo,SP,29150127e6685892b6eab3eec79f59c7,delivered,2018-01-12 20:48:24,2018-01-12 20:58:32,2018-01-15 17:14:59,2018-01-29 12:41:19,2018-02-06 00:00:00,1,credit_card,8,335.48
2,4e7b3e00288586ebd08712fdd0374a03,060e732b5b29e8181a18229c7b0b2b5e,1151,sao paulo,SP,b2059ed67ce144a36e2aa97d2c9e9ad2,delivered,2018-05-19 16:07:45,2018-05-20 16:19:10,2018-06-11 14:31:00,2018-06-14 17:58:51,2018-06-13 00:00:00,1,credit_card,7,157.73
3,b2b6027bc5c5109e529d4dc6358b12c3,259dac757896d24d7702b9acbbff3f3c,8775,mogi das cruzes,SP,951670f92359f4fe4a63112aa7306eba,delivered,2018-03-13 16:06:38,2018-03-13 17:29:19,2018-03-27 23:22:42,2018-03-28 16:04:25,2018-04-10 00:00:00,1,credit_card,1,173.3
4,4f2d8ab171c80ec8364f7c12e35b23ad,345ecd01c38d18a9036ed96c73b8d066,13056,campinas,SP,6b7d50bd145f6fc7f33cebabd7e49d0f,delivered,2018-07-29 09:51:30,2018-07-29 10:10:09,2018-07-30 15:16:00,2018-08-09 20:55:48,2018-08-15 00:00:00,1,credit_card,8,252.25


## Answering business questions
* Frequency
* Monetary value
* Recency

In [30]:
#Creating empty dataframe
final_df = pd.DataFrame()

In [31]:
final_df

In [33]:
customers_pays['customer_unique_id'].count

<bound method Series.count of 0         861eff4711a542e4b93843c6dd7febb0
1         290c77bc529b7ac935b93aa66c333dc3
2         060e732b5b29e8181a18229c7b0b2b5e
3         259dac757896d24d7702b9acbbff3f3c
4         345ecd01c38d18a9036ed96c73b8d066
                        ...               
103881    1a29b476fee25c95fbafc67c5ac95cf8
103882    d52a67c98be1cf6a5c84435bd38d095d
103883    e9f50caf99f032f0bf3c55141f019d99
103884    73c2643a0a458b49f58cea58833b192e
103885    84732c5050c01db9b23e19ba39899398
Name: customer_unique_id, Length: 103886, dtype: object>

In [34]:
list(customers_pays['customer_unique_id'])

['861eff4711a542e4b93843c6dd7febb0',
 '290c77bc529b7ac935b93aa66c333dc3',
 '060e732b5b29e8181a18229c7b0b2b5e',
 '259dac757896d24d7702b9acbbff3f3c',
 '345ecd01c38d18a9036ed96c73b8d066',
 '4c93744516667ad3b8f1fb645a3116a4',
 'addec96d2e059c80c30fe6871d30d177',
 '57b2a98a409812fe9618067b6b8ebe4f',
 '1175e95fb47ddff9de6b2b06188f7e0d',
 '9afe194fb833f79e300e37e580171f22',
 '2a7745e1ed516b289ed9b29c7d0539a5',
 '2a46fb94aef5cbeeb850418118cee090',
 '918dc87cd72cd9f6ed4bd442ed785235',
 '295c05e81917928d76245e842748184d',
 '3151a81801c8386361b62277d7fa5ecf',
 '21f748a16f4e1688a9014eb3ee6fa325',
 '5c2991dbd08bbf3cf410713c4de5a0b5',
 'b6e99561fe6f34a55b0b7da92f8ed775',
 '7f3a72e8f988c6e735ba118d54f47458',
 '7f3a72e8f988c6e735ba118d54f47458',
 '3e6fd6b2f0d499456a6a6820a40f2d79',
 'e607ede0e63436308660236f5a52da5e',
 'a96d5cfa0d3181817e2b946f921ea021',
 '482441ea6a06b1f72fe9784756c0ea75',
 '4390ddbb6276a66ff1736a6710205dca',
 'a5844ba4bfc8d0cc61d13027c7e63bcc',
 '5f102dd37243f152aec3607970aad100',
 

In [35]:
len(list(customers_pays['customer_unique_id']))

103886

In [36]:
set(list(customers_pays['customer_unique_id']))

{'8537fc70ed0482fbabe873dade8bf864',
 'bc9c7f53188b9b46d18cf8486b7101a7',
 '0ed4d38388e9002ea31d12c0da0e9924',
 '4d8630af5cec97e959e55129dc8eaabe',
 '39518dab7aa8a79da80322d53bf504d4',
 'b067aedc3cb45dd6fbc704365059764a',
 '48edb3184727c93784fe9697379fd4b0',
 '756812eb8887daa9a269e77e3baf38e0',
 '499e1ddbf2c2567324f4d3733eaeedeb',
 '08e317b27a267a1ac9dc207c95271ec3',
 '7474c50d56685370c23b5d47593d8ce9',
 'b21e6ad1f970ff49e9a60fc17ecbf181',
 'dba4ad448aa28c20665f6e71ec2a54e0',
 '3489a2afde268cc8e832c33bbd7261d5',
 'f66727b308f6f480e6693b25b8d98b27',
 'bd27c65e0405e162422cf80ab58aca59',
 '423fbcfb53c72feb1035cd7790c73fe6',
 '60af18e2b219938a9c9755aa018cd46c',
 'ef4c885e8c3d5ff5b48789cba80de5f6',
 '0d8b6f37ce56eb5679cf690b59318b4c',
 'c5c348704121fa48b8c005478f8c8a5e',
 '2c5bafa2e3d48ca7b446921b05d7f5f4',
 'ae31038af445c27b99a524dccf7ef4a0',
 '9aa2ea826dc09dd76e04f020f22af5bd',
 '04e7b09e41a01df2100b30d1a880680d',
 '6b706e0280f4fb5a66777a2d861bf798',
 '1a2c2b108bb52869a5b85dc63b2b70cc',
 

## Getting unique customerIds and storing

In [None]:
#Getting unique customerIds and storing 
#Solution1
final_df['customer_unique_id'] = set(list(customers_pays['customer_unique_id']))

In [38]:
#Solution2: we can also use this step for storing unique customer ids in final df
final_df['customer_unique_id'] = customers_pays['customer_unique_id'].unique()

In [39]:
final_df['customer_unique_id']

0        861eff4711a542e4b93843c6dd7febb0
1        290c77bc529b7ac935b93aa66c333dc3
2        060e732b5b29e8181a18229c7b0b2b5e
3        259dac757896d24d7702b9acbbff3f3c
4        345ecd01c38d18a9036ed96c73b8d066
                       ...               
96090    1a29b476fee25c95fbafc67c5ac95cf8
96091    d52a67c98be1cf6a5c84435bd38d095d
96092    e9f50caf99f032f0bf3c55141f019d99
96093    73c2643a0a458b49f58cea58833b192e
96094    84732c5050c01db9b23e19ba39899398
Name: customer_unique_id, Length: 96095, dtype: object

## Monetary value: Finding total amount paid by each customer

In [40]:
#monetary value
monetary_value = customers_pays.groupby('customer_unique_id')['payment_value'].sum().reset_index()

In [41]:
monetary_value=customers_pays.groupby('customer_unique_id')[['payment_value']].sum()

In [42]:
#Get customer wise sum of paymentvalue
customers_pays.groupby('customer_unique_id')['payment_value'].sum().reset_index()

Unnamed: 0,customer_unique_id,payment_value
0,0000366f3b9a7992bf8c76cfdf3221e2,141.90
1,0000b849f77a49e4a4ce2b2a4ca5be3f,27.19
2,0000f46a3911fa3c0805444483337064,86.22
3,0000f6ccb0745a6a4b88665a16c9f078,43.62
4,0004aac84e0df4da2b147fca70cf8255,196.89
...,...,...
96090,fffcf5a5ff07b0908bd4e2dbc735a684,2067.42
96091,fffea47cd6d3cc0a88bd621562a9d061,84.58
96092,ffff371b4d645b6ecea244b27531430a,112.46
96093,ffff5962728ec6157033ef9805bacc48,133.69


In [43]:
final_df['monetary'] = monetary_value['payment_value']
final_df

Unnamed: 0,customer_unique_id,monetary
0,861eff4711a542e4b93843c6dd7febb0,
1,290c77bc529b7ac935b93aa66c333dc3,
2,060e732b5b29e8181a18229c7b0b2b5e,
3,259dac757896d24d7702b9acbbff3f3c,
4,345ecd01c38d18a9036ed96c73b8d066,
...,...,...
96090,1a29b476fee25c95fbafc67c5ac95cf8,
96091,d52a67c98be1cf6a5c84435bd38d095d,
96092,e9f50caf99f032f0bf3c55141f019d99,
96093,73c2643a0a458b49f58cea58833b192e,


In [44]:
df = pd.merge(final_df, monetary_value, on='customer_unique_id')
df

Unnamed: 0,customer_unique_id,monetary,payment_value
0,861eff4711a542e4b93843c6dd7febb0,,146.87
1,290c77bc529b7ac935b93aa66c333dc3,,335.48
2,060e732b5b29e8181a18229c7b0b2b5e,,157.73
3,259dac757896d24d7702b9acbbff3f3c,,173.30
4,345ecd01c38d18a9036ed96c73b8d066,,252.25
...,...,...,...
96090,1a29b476fee25c95fbafc67c5ac95cf8,,88.78
96091,d52a67c98be1cf6a5c84435bd38d095d,,129.06
96092,e9f50caf99f032f0bf3c55141f019d99,,56.04
96093,73c2643a0a458b49f58cea58833b192e,,711.07


In [45]:
f = final_df.copy()

In [59]:
customers_pays

Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state,order_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date,payment_sequential,payment_type,payment_installments,payment_value
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP,00e7ee1b050b8499577073aeb2a297a1,delivered,2017-05-16 15:05:35,2017-05-16 15:22:12,2017-05-23 10:47:57,2017-05-25 10:35:35,2017-06-05 00:00:00,1,credit_card,2,146.87
1,18955e83d337fd6b2def6b18a428ac77,290c77bc529b7ac935b93aa66c333dc3,9790,sao bernardo do campo,SP,29150127e6685892b6eab3eec79f59c7,delivered,2018-01-12 20:48:24,2018-01-12 20:58:32,2018-01-15 17:14:59,2018-01-29 12:41:19,2018-02-06 00:00:00,1,credit_card,8,335.48
2,4e7b3e00288586ebd08712fdd0374a03,060e732b5b29e8181a18229c7b0b2b5e,1151,sao paulo,SP,b2059ed67ce144a36e2aa97d2c9e9ad2,delivered,2018-05-19 16:07:45,2018-05-20 16:19:10,2018-06-11 14:31:00,2018-06-14 17:58:51,2018-06-13 00:00:00,1,credit_card,7,157.73
3,b2b6027bc5c5109e529d4dc6358b12c3,259dac757896d24d7702b9acbbff3f3c,8775,mogi das cruzes,SP,951670f92359f4fe4a63112aa7306eba,delivered,2018-03-13 16:06:38,2018-03-13 17:29:19,2018-03-27 23:22:42,2018-03-28 16:04:25,2018-04-10 00:00:00,1,credit_card,1,173.30
4,4f2d8ab171c80ec8364f7c12e35b23ad,345ecd01c38d18a9036ed96c73b8d066,13056,campinas,SP,6b7d50bd145f6fc7f33cebabd7e49d0f,delivered,2018-07-29 09:51:30,2018-07-29 10:10:09,2018-07-30 15:16:00,2018-08-09 20:55:48,2018-08-15 00:00:00,1,credit_card,8,252.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103881,17ddf5dd5d51696bb3d7c6291687be6f,1a29b476fee25c95fbafc67c5ac95cf8,3937,sao paulo,SP,6760e20addcf0121e9d58f2f1ff14298,delivered,2018-04-07 15:48:17,2018-04-07 16:08:45,2018-04-11 02:08:36,2018-04-13 20:06:37,2018-04-25 00:00:00,1,credit_card,6,88.78
103882,e7b71a9017aa05c9a7fd292d714858e8,d52a67c98be1cf6a5c84435bd38d095d,6764,taboao da serra,SP,9ec0c8947d973db4f4e8dcf1fbfa8f1b,delivered,2018-04-04 08:20:22,2018-04-04 08:35:12,2018-04-05 18:42:35,2018-04-11 18:54:45,2018-04-20 00:00:00,1,credit_card,3,129.06
103883,5e28dfe12db7fb50a4b2f691faecea5e,e9f50caf99f032f0bf3c55141f019d99,60115,fortaleza,CE,fed4434add09a6f332ea398efd656a5c,delivered,2018-04-08 20:11:50,2018-04-08 20:30:03,2018-04-09 17:52:17,2018-05-09 19:03:15,2018-05-02 00:00:00,1,credit_card,5,56.04
103884,56b18e2166679b8a959d72dd06da27f9,73c2643a0a458b49f58cea58833b192e,92120,canoas,RS,e31ec91cea1ecf97797787471f98a8c2,delivered,2017-11-03 21:08:33,2017-11-03 21:31:20,2017-11-06 18:24:41,2017-11-16 19:58:39,2017-12-05 00:00:00,1,credit_card,2,711.07


## Frequency: Finding number of orders placed by each customer

In [47]:
freq = customers_pays.groupby('customer_unique_id')['order_id'].count().reset_index()

In [48]:
freq

Unnamed: 0,customer_unique_id,order_id
0,0000366f3b9a7992bf8c76cfdf3221e2,1
1,0000b849f77a49e4a4ce2b2a4ca5be3f,1
2,0000f46a3911fa3c0805444483337064,1
3,0000f6ccb0745a6a4b88665a16c9f078,1
4,0004aac84e0df4da2b147fca70cf8255,1
...,...,...
96090,fffcf5a5ff07b0908bd4e2dbc735a684,1
96091,fffea47cd6d3cc0a88bd621562a9d061,1
96092,ffff371b4d645b6ecea244b27531430a,1
96093,ffff5962728ec6157033ef9805bacc48,1


In [49]:
df = pd.merge(df, freq, on='customer_unique_id')

In [50]:
df

Unnamed: 0,customer_unique_id,monetary,payment_value,order_id
0,861eff4711a542e4b93843c6dd7febb0,,146.87,1
1,290c77bc529b7ac935b93aa66c333dc3,,335.48,1
2,060e732b5b29e8181a18229c7b0b2b5e,,157.73,1
3,259dac757896d24d7702b9acbbff3f3c,,173.30,1
4,345ecd01c38d18a9036ed96c73b8d066,,252.25,1
...,...,...,...,...
96090,1a29b476fee25c95fbafc67c5ac95cf8,,88.78,1
96091,d52a67c98be1cf6a5c84435bd38d095d,,129.06,1
96092,e9f50caf99f032f0bf3c55141f019d99,,56.04,1
96093,73c2643a0a458b49f58cea58833b192e,,711.07,1


## Get Recency: 
## Finding total number of days before(from 2018-12-31) last/latest order placed by each customer

In [None]:
# def get_recency(x):
#    data = customers_pays[customers_pays['customer_unique_id'] == x]
#    date = max(data.order_purchase_timestamp)
#    length = pd.to_datetime('2018-12-31') - pd.to_datetime(date)
    # how many days ago davis bought something
    
 #   return length.days

In [65]:
#Function for calculating number of days by passing last order date substracting it from given date criteria 2018-12-31
def get_recency(x):
    x = pd.to_datetime(x)
    return (pd.to_datetime('2018-12-31') - x).days

In [63]:
pd.to_datetime('2018-12-31')-pd.to_datetime(customers_pays.iloc[0]['order_purchase_timestamp'])

Timedelta('593 days 08:54:25')

In [61]:
last_time = customers_pays.groupby('customer_unique_id')['order_purchase_timestamp'].max().reset_index()

In [62]:
last_time

Unnamed: 0,customer_unique_id,order_purchase_timestamp
0,0000366f3b9a7992bf8c76cfdf3221e2,2018-05-10 10:56:27
1,0000b849f77a49e4a4ce2b2a4ca5be3f,2018-05-07 11:11:27
2,0000f46a3911fa3c0805444483337064,2017-03-10 21:05:03
3,0000f6ccb0745a6a4b88665a16c9f078,2017-10-12 20:29:41
4,0004aac84e0df4da2b147fca70cf8255,2017-11-14 19:45:42
...,...,...
96090,fffcf5a5ff07b0908bd4e2dbc735a684,2017-06-08 21:00:36
96091,fffea47cd6d3cc0a88bd621562a9d061,2017-12-10 20:07:56
96092,ffff371b4d645b6ecea244b27531430a,2017-02-07 15:49:16
96093,ffff5962728ec6157033ef9805bacc48,2018-05-02 15:17:41


In [66]:
#passing last order date and calling function
last_time['recency'] = last_time['order_purchase_timestamp'].apply(lambda x: get_recency(x))

In [67]:
last_time

Unnamed: 0,customer_unique_id,order_purchase_timestamp,recency
0,0000366f3b9a7992bf8c76cfdf3221e2,2018-05-10 10:56:27,234
1,0000b849f77a49e4a4ce2b2a4ca5be3f,2018-05-07 11:11:27,237
2,0000f46a3911fa3c0805444483337064,2017-03-10 21:05:03,660
3,0000f6ccb0745a6a4b88665a16c9f078,2017-10-12 20:29:41,444
4,0004aac84e0df4da2b147fca70cf8255,2017-11-14 19:45:42,411
...,...,...,...
96090,fffcf5a5ff07b0908bd4e2dbc735a684,2017-06-08 21:00:36,570
96091,fffea47cd6d3cc0a88bd621562a9d061,2017-12-10 20:07:56,385
96092,ffff371b4d645b6ecea244b27531430a,2017-02-07 15:49:16,691
96093,ffff5962728ec6157033ef9805bacc48,2018-05-02 15:17:41,242
