# Sorting and tagging customers depending on their LTV

## Sorting

In [1]:
## Bubble sort

def my_sort(xs, f=lambda x: x):
    for i in range(len(xs) - 1):
        for j in range(i, len(xs)):
            if f(xs[i]) < f(xs[j]):
                xs[i], xs[j] = xs[j], xs[i]
    return xs

In [2]:
my_sort([-1,2,5,-5,6])

[6, 5, 2, -1, -5]

In [3]:
## Bubble sort for dictionaries
def my_sort_dict(xs, f=lambda x:x[1]):
    xs = [(k,v) for k,v in xs.items()]
    return {k: v for (k,v) in my_sort(xs,f)}

In [4]:
test_dict = {"Customer1": 5000,
 "Customer2": 2500,
 "Customer3": 8000}

In [5]:
my_sort_dict(test_dict)

{'Customer3': 8000, 'Customer1': 5000, 'Customer2': 2500}

## Apply to our dataset

In [6]:
import pandas as pd
import sqlite3

In [7]:
conn = sqlite3.connect("churn.db")

In [8]:
LTV = pd.read_sql("select CustomerID, TotalCharges from churn_all", conn).set_index("CustomerID").T.to_dict("records")[0]

In [9]:
LTV

{'3668-QPYBK': 108.15,
 '9237-HQITU': 151.65,
 '9305-CDSKC': 820.5,
 '7892-POOKP': 3046.05,
 '0280-XJGEX': 5036.3,
 '4190-MFLUW': 528.35,
 '8779-QRDMV': 39.65,
 '1066-JKSGK': 20.15,
 '6467-CHFZW': 4749.15,
 '8665-UTDHZ': 30.2,
 '8773-HHUOZ': 1093.1,
 '6047-YHPVI': 316.9,
 '5380-WJKOV': 3549.25,
 '8168-UQWWF': 1105.4,
 '7760-OYPDY': 144.15,
 '9420-LOJKX': 1426.4,
 '7495-OOKFY': 633.3,
 '1658-BYGOY': 1752.55,
 '5698-BQJOH': 857.25,
 '5919-TMRGD': 79.35,
 '9191-MYQKX': 496.9,
 '8637-XJIVR': 927.35,
 '0278-YXOOG': 113.85,
 '4598-XLKNJ': 2514.5,
 '3192-NQECA': 7611.85,
 '0486-HECZI': 5238.9,
 '4846-WHAFZ': 2868.15,
 '5299-RULOA': 1064.65,
 '0404-SWRVG': 229.55,
 '4412-YLTKF': 2135.5,
 '6207-WIOLX': 1502.65,
 '3091-FYHKI': 35.45,
 '2372-HWUHI': 81.25,
 '0390-DCFDQ': 70.45,
 '4080-OGPJL': 563.65,
 '2135-RXIHG': 45.65,
 '3874-EQOEP': 655.5,
 '0867-MKZVY': 1592.35,
 '3376-BMGFE': 273.0,
 '3445-HXXGF': 2651.2,
 '1875-QIVME': 242.8,
 '0691-JVSYA': 5000.2,
 '2656-FMOKZ': 1145.7,
 '2070-FNEXE': 503

In [10]:
my_sort_dict(LTV)

{'2889-FPWRM': 8684.8,
 '7569-NMZYQ': 8672.45,
 '9739-JLPQJ': 8670.1,
 '9788-HNGUT': 8594.4,
 '8879-XUAHX': 8564.75,
 '9924-JPRMC': 8547.15,
 '0675-NCDYU': 8543.25,
 '6650-BWFRT': 8529.5,
 '0164-APGRB': 8496.7,
 '1488-PBLJN': 8477.7,
 '8984-HPEMB': 8477.6,
 '6007-TCTST': 8476.5,
 '4376-KFVRS': 8468.2,
 '0017-IUDMW': 8456.75,
 '5451-YHYPW': 8443.7,
 '6904-JLBGY': 8436.25,
 '8263-QMNTJ': 8425.3,
 '8015-IHCGW': 8425.15,
 '5914-XRFQB': 8424.9,
 '8454-AATJP': 8405.0,
 '1480-BKXGA': 8404.9,
 '8606-CIQUL': 8399.15,
 '7359-WWYJV': 8375.05,
 '2380-DAMQP': 8349.7,
 '0906-QVPMS': 8349.45,
 '3258-ZKPAI': 8337.45,
 '1779-PWPMG': 8333.95,
 '8513-OLYGY': 8332.15,
 '5135-GRQJV': 8331.95,
 '2388-LAESQ': 8317.95,
 '9298-WGMRW': 8312.75,
 '2121-JAFOM': 8312.4,
 '8975-SKGRX': 8310.55,
 '0186-CAERR': 8309.55,
 '3810-DVDQQ': 8308.9,
 '8207-DMRVL': 8306.05,
 '2223-GDSHL': 8297.5,
 '3892-NXAZG': 8289.2,
 '8628-MFKAX': 8277.05,
 '3396-DKDEL': 8250.0,
 '6859-QNXIQ': 8248.5,
 '7929-SKFGK': 8244.3,
 '9351-HXDMR':

## Tagging our customers

In [11]:
def create_segment(df, LTV_col, target_col, LTV_value, upper_segment="High", lower_segment="Low"):
    df[target_col] = df[LTV_col].map(lambda x: upper_segment if x > LTV_value else lower_segment)
    return df

In [12]:
df = pd.read_sql("select * from churn_all", conn)

In [13]:
LTV_Value = df.TotalCharges.quantile(0.80)

In [14]:
LTV_Value

4475.41

In [15]:
churn_tagged = create_segment(df, "TotalCharges", "LTV_Segment", LTV_Value)

In [16]:
churn_tagged.head()

Unnamed: 0,CustomerID,Gender,SeniorCitizen,Partner,Dependents,State,Latitude,Longitude,ZipCode,PhoneService,...,StreamingTV,StreamingMovies,Tenure,Contract,PaymentMethod,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,LTV_Segment
0,3668-QPYBK,Male,No,No,No,California,33.964131,-118.272783,90003,Yes,...,No,No,2,Month-to-month,Mailed check,Yes,53.85,108.15,Yes,Low
1,9237-HQITU,Female,No,No,Yes,California,34.059281,-118.30742,90005,Yes,...,No,No,2,Month-to-month,Electronic check,Yes,70.7,151.65,Yes,Low
2,9305-CDSKC,Female,No,No,Yes,California,34.048013,-118.293953,90006,Yes,...,Yes,Yes,8,Month-to-month,Electronic check,Yes,99.65,820.5,Yes,Low
3,7892-POOKP,Female,No,Yes,Yes,California,34.062125,-118.315709,90010,Yes,...,Yes,Yes,28,Month-to-month,Electronic check,Yes,104.8,3046.05,Yes,Low
4,0280-XJGEX,Male,No,No,Yes,California,34.039224,-118.266293,90015,Yes,...,Yes,Yes,49,Month-to-month,Bank transfer (automatic),Yes,103.7,5036.3,Yes,High


In [17]:
churn_tagged["LTV_Segment"].value_counts()

Low     5625
High    1407
Name: LTV_Segment, dtype: int64

## Creating the LTV_Analysis class

In [18]:
class LTV_Analysis:
    
    def __init__(self):
        pass
    
    ## Bubble sort

    def my_sort(self,xs, f=lambda x: x):
        for i in range(len(xs) - 1):
            for j in range(i, len(xs)):
                if f(xs[i]) < f(xs[j]):
                    xs[i], xs[j] = xs[j], xs[i]
        return xs
    
    ## Bubble sort for dictionaries
    
    def my_sort_dict(self, xs, f=lambda x:x[1]):
        xs = [(k,v) for k,v in xs.items()]
        return {k: v for (k,v) in my_sort(xs,f)}
    
    ## Segmetation function
    
    def create_segment(self, df, LTV_col, target_col, LTV_value, upper_segment="High", lower_segment="Low"):
        df[target_col] = df[LTV_col].map(lambda x: upper_segment if x > LTV_value else lower_segment)
        return df

In [19]:
l = LTV_Analysis()

In [None]:
l.my_sort_dict(LTV)

In [None]:
l.create_segment(df, "TotalCharges", "LTV_Segment", LTV_Value)