In [1]:
import sys, getopt
import pandas as pd
import numpy as np
import datetime as dt
import time

from IPython.display import FileLink, FileLinks

def rfm(input_file, input_date):
    trans = pd.read_csv(input_file)
    NOW = dt.datetime.strptime(input_date, '%d/%m/%y')
    trans['TransactionDate'] = trans['TransactionDate'].apply(lambda x: dt.datetime.strptime(x,'%d-%b-%y'))
    trans['Revenue'] = pd.to_numeric(trans['Revenue'],errors='coerce')
    rfmTable = trans.groupby('UserID').agg({'TransactionDate': lambda x: (NOW - x.max()).days,
                                            'TransactionID': 'count',
                                            'Revenue': 'sum'})

    rfmTable['TransactionDate'] = rfmTable['TransactionDate'].astype(int)
    rfmTable.rename(columns={'TransactionDate': 'recency', 
                             'TransactionID': 'frequency', 
                             'Revenue': 'monetary'}, inplace=True)
    
    quantiles = rfmTable.quantile(q=[0.20,0.40,0.60,0.80])
    quantiles = quantiles.to_dict()

    rfmSegmentation = rfmTable

    rfmSegmentation['R_Quartile'] = rfmSegmentation['recency'].apply(RClass, args=('recency',quantiles,))
    rfmSegmentation['F_Quartile'] = rfmSegmentation['frequency'].apply(FMClass, args=('frequency',quantiles,))
    rfmSegmentation['M_Quartile'] = rfmSegmentation['monetary'].apply(FMClass, args=('monetary',quantiles,))

    rfmSegmentation['RFMClass'] = rfmSegmentation.R_Quartile.map(str) + rfmSegmentation.F_Quartile.map(str) + rfmSegmentation.M_Quartile.map(str)
    return rfmSegmentation
    
    
def RClass(x,p,d):
     if x <= d[p][0.20]:
         return 1
     elif x <= d[p][0.40]:
         return 2
     elif x <= d[p][0.60]: 
         return 3
     elif x <= d[p][0.80]: 
         return 4
     else:
         return 5
     
# Arguments (x = value, p = recency, monetary, frequency, k = quartiles dict)
def FMClass(x,p,d):
     if x <= d[p][0.20]:
         return 5
     elif x <= d[p][0.40]:
         return 4
     elif x <= d[p][0.60]: 
         return 3
     elif x <= d[p][0.80]: 
         return 2
     else:
         return 1


input_file = 'TRANS.csv' # 'rfm_input_file.csv'
output_file = 'rfm.csv'
start_date = dt.datetime.utcnow().strftime('%d/%m/%y')

if __name__ == "__main__":
    if len(sys.argv) >= 4:
        input_file = sys.argv[2]
        start_date = sys.argv[3]
    if len(sys.argv) >= 5:
        output_file = sys.argv[4]

print('input_file: %s; output_file: %s; start_date: %s\n' % (input_file, output_file, start_date))
RFM_Table = rfm(input_file, start_date)
print(RFM_Table)
RFM_Table.to_csv(output_file, sep = ',')

FileLink(output_file)


input_file: TRANS.csv; output_file: rfm.csv; start_date: 28/03/18

        recency  frequency  monetary  R_Quartile  F_Quartile  M_Quartile  \
UserID                                                                     
10609      1816          3    413.92           4           2           1   
10922      1480          1    184.26           1           5           3   
11300      1854          1     91.02           4           5           4   
11400      1696          5    851.73           3           1           1   
11487      1629          1    147.22           3           5           4   
11762      1874          1     32.31           4           5           5   
11971      1936          2    229.78           5           3           3   
12047      1623          1     74.06           3           5           5   
12067      1507          1     63.89           1           5           5   
12114      1844          1     84.44           4           5           5   
12170      1802      