# Sorting and tagging customers depending on their LTV

## Sorting

In [1]:
## Bubble sort

def my_sort(xs, f=lambda x: x):
    for i in range(len(xs) - 1):
        for j in range(i, len(xs)):
            if f(xs[i]) < f(xs[j]):
                xs[i], xs[j] = xs[j], xs[i]
    return xs

In [2]:
my_sort([3000, 4500, 3200, 5000, 3500])

[5000, 4500, 3500, 3200, 3000]

In [3]:
## Bubble sort for dictionaries
def my_sort_dict(xs, f=lambda x:x[1]):
    xs = [(k,v) for k,v in xs.items()]
    return {k: v for (k,v) in my_sort(xs,f)}

In [4]:
test_dict = {"Customer1": 5000,
 "Customer2": 2500,
 "Customer3": 8000}

In [5]:
my_sort_dict(test_dict)

{'Customer3': 8000, 'Customer1': 5000, 'Customer2': 2500}

## Apply to our dataset

In [6]:
import pandas as pd
import sqlite3

In [7]:
conn = sqlite3.connect("churn.db")

In [8]:
LTV = pd.read_sql("select CustomerID, TotalCharges from churn_all", conn).set_index("CustomerID").T.to_dict("records")[0]

In [None]:
# View the first 5 items in the dictionary LTV

In [23]:
dict(list(LTV.items())[0:5])

{'3668-QPYBK': 108.15,
 '9237-HQITU': 151.65,
 '9305-CDSKC': 820.5,
 '7892-POOKP': 3046.05,
 '0280-XJGEX': 5036.3}

In [None]:
my_sort_dict(LTV)

In [None]:
# View the first 5 items in the Sorted dictionary

In [26]:
dict(list(my_sort_dict(LTV).items())[0:5])

{'2889-FPWRM': 8684.8,
 '7569-NMZYQ': 8672.45,
 '9739-JLPQJ': 8670.1,
 '9788-HNGUT': 8594.4,
 '8879-XUAHX': 8564.75}

## Tagging our customers

In [11]:
def create_segment(df, LTV_col, target_col, LTV_value, upper_segment="High", lower_segment="Low"):
    df[target_col] = df[LTV_col].map(lambda x: upper_segment if x > LTV_value else lower_segment)
    return df

In [12]:
df = pd.read_sql("select * from churn_all", conn)

In [13]:
LTV_Value = df.TotalCharges.quantile(0.80)

In [14]:
LTV_Value

4475.41

In [15]:
churn_tagged = create_segment(df, "TotalCharges", "LTV_Segment", LTV_Value)

In [16]:
churn_tagged.head()

Unnamed: 0,CustomerID,Gender,SeniorCitizen,Partner,Dependents,State,Latitude,Longitude,ZipCode,PhoneService,...,StreamingTV,StreamingMovies,Tenure,Contract,PaymentMethod,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,LTV_Segment
0,3668-QPYBK,Male,No,No,No,California,33.964131,-118.272783,90003,Yes,...,No,No,2,Month-to-month,Mailed check,Yes,53.85,108.15,Yes,Low
1,9237-HQITU,Female,No,No,Yes,California,34.059281,-118.30742,90005,Yes,...,No,No,2,Month-to-month,Electronic check,Yes,70.7,151.65,Yes,Low
2,9305-CDSKC,Female,No,No,Yes,California,34.048013,-118.293953,90006,Yes,...,Yes,Yes,8,Month-to-month,Electronic check,Yes,99.65,820.5,Yes,Low
3,7892-POOKP,Female,No,Yes,Yes,California,34.062125,-118.315709,90010,Yes,...,Yes,Yes,28,Month-to-month,Electronic check,Yes,104.8,3046.05,Yes,Low
4,0280-XJGEX,Male,No,No,Yes,California,34.039224,-118.266293,90015,Yes,...,Yes,Yes,49,Month-to-month,Bank transfer (automatic),Yes,103.7,5036.3,Yes,High


In [17]:
churn_tagged["LTV_Segment"].value_counts()

Low     5625
High    1407
Name: LTV_Segment, dtype: int64

## Creating the LTV_Analysis class

In [18]:
class LTV_Analysis:
    
    def __init__(self):
        pass
    
    ## Bubble sort

    def my_sort(self,xs, f=lambda x: x):
        for i in range(len(xs) - 1):
            for j in range(i, len(xs)):
                if f(xs[i]) < f(xs[j]):
                    xs[i], xs[j] = xs[j], xs[i]
        return xs
    
    ## Bubble sort for dictionaries
    
    def my_sort_dict(self, xs, f=lambda x:x[1]):
        xs = [(k,v) for k,v in xs.items()]
        return {k: v for (k,v) in my_sort(xs,f)}
    
    ## Segmetation function
    
    def create_segment(self, df, LTV_col, target_col, LTV_value, upper_segment="High", lower_segment="Low"):
        df[target_col] = df[LTV_col].map(lambda x: upper_segment if x > LTV_value else lower_segment)
        return df

In [27]:
l = LTV_Analysis()

In [None]:
l.my_sort_dict(LTV)

In [None]:
# View the first 5 items in the dictionary LTV

In [33]:
sorting = l.my_sort_dict(LTV)

In [34]:
dict(list(sorting.items())[0:5])

{'2889-FPWRM': 8684.8,
 '7569-NMZYQ': 8672.45,
 '9739-JLPQJ': 8670.1,
 '9788-HNGUT': 8594.4,
 '8879-XUAHX': 8564.75}

#### Tagging Customers

In [None]:
l.create_segment(df, "TotalCharges", "LTV_Segment", LTV_Value)

In [None]:
# View the first 10 items of the dataframe

In [30]:
analysis= l.create_segment(df, "TotalCharges", "LTV_Segment", LTV_Value)

In [32]:
analysis.head(10)

Unnamed: 0,CustomerID,Gender,SeniorCitizen,Partner,Dependents,State,Latitude,Longitude,ZipCode,PhoneService,...,StreamingTV,StreamingMovies,Tenure,Contract,PaymentMethod,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,LTV_Segment
0,3668-QPYBK,Male,No,No,No,California,33.964131,-118.272783,90003,Yes,...,No,No,2,Month-to-month,Mailed check,Yes,53.85,108.15,Yes,Low
1,9237-HQITU,Female,No,No,Yes,California,34.059281,-118.30742,90005,Yes,...,No,No,2,Month-to-month,Electronic check,Yes,70.7,151.65,Yes,Low
2,9305-CDSKC,Female,No,No,Yes,California,34.048013,-118.293953,90006,Yes,...,Yes,Yes,8,Month-to-month,Electronic check,Yes,99.65,820.5,Yes,Low
3,7892-POOKP,Female,No,Yes,Yes,California,34.062125,-118.315709,90010,Yes,...,Yes,Yes,28,Month-to-month,Electronic check,Yes,104.8,3046.05,Yes,Low
4,0280-XJGEX,Male,No,No,Yes,California,34.039224,-118.266293,90015,Yes,...,Yes,Yes,49,Month-to-month,Bank transfer (automatic),Yes,103.7,5036.3,Yes,High
5,4190-MFLUW,Female,No,Yes,No,California,34.066367,-118.309868,90020,Yes,...,No,No,10,Month-to-month,Credit card (automatic),No,55.2,528.35,Yes,Low
6,8779-QRDMV,Male,Yes,No,No,California,34.02381,-118.156582,90022,No,...,No,Yes,1,Month-to-month,Electronic check,Yes,39.65,39.65,Yes,Low
7,1066-JKSGK,Male,No,No,No,California,34.066303,-118.435479,90024,Yes,...,No internet service,No internet service,1,Month-to-month,Mailed check,No,20.15,20.15,Yes,Low
8,6467-CHFZW,Male,No,Yes,Yes,California,34.099869,-118.326843,90028,Yes,...,Yes,Yes,47,Month-to-month,Electronic check,Yes,99.35,4749.15,Yes,High
9,8665-UTDHZ,Male,No,Yes,No,California,34.089953,-118.294824,90029,No,...,No,No,1,Month-to-month,Electronic check,No,30.2,30.2,Yes,Low
