In [73]:
import dask.dataframe as dd
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from joblib import load

# ///////////////////////////////////////////////
#function to get all clients churn probability: 0 means no churn, 1 means churn
def getChurnProbabilities(random_forest, x):
    return random_forest.predict_proba(x)

def showProbabilities(low,mid,high, proba_matrix, x):
    clients_permanent = []
    clients_low = []
    clients_mid = []
    clients_high = []
    i = 0
    
    #for each client in the data set
    for client in proba_matrix:
        #get all their data and their churn chance into one list
        client_index = x.index[i]
        client_info = x.loc[client_index].values
        client_info = np.append(client_info,client[1])
        #store client data into profiles(permanent, low, mid, high) list
        if client[1] < low:
            clients_permanent.append(client_info)
        elif client[1] < mid:
            clients_low.append(client_info)
        elif client[1] < high:
            clients_mid.append(client_info)
        else:
            clients_high.append(client_info)
        i += 1
    return clients_permanent, clients_low, clients_mid, clients_high

#function to get each groups relevance in terms of bill amount by getting the sum of a determined group
def getClassificationSum(churn_group, x):
    names = x.columns.to_list()
    names[0] = "CUSTOMER_ID"
    names.append('CHURN_PERCENTAGE')
    clients_high_pd = pd.DataFrame(churn_group, columns = names)
    churn_bill_value = clients_high_pd['BILL_AMOUNT'].sum()
    return churn_bill_value

# -----------------------------------------------
def make_clusters(file):
    return 1

# cluster: archivo csv donde se encuentra un cluster
# cs1, cs2, cs3: rangos de churn segment
def make_perfiles(cluster, cs1, cs2, cs3):
    # Lee el cluster
    print(cluster + "/cluster.csv")
    df = dd.read_csv(cluster + "/cluster.csv")
    x = df.drop(columns=['TARGET']).compute()

    # Obtiene la probabilidad de churn de todos los elementos del cluster
    random_forest = load('random_forest_churn.joblib')
    proba_matrix = getChurnProbabilities(random_forest, x)

    # Segmenta los elementos del cluster
    segments = []
    segments = showProbabilities(cs1, cs2, cs3, proba_matrix, x)

    names = x.columns.to_list()
    names[0] = "CUSTOMER_ID"
    names.append('CHURN_PERCENTAGE')
    
    i = 0
    for segment in segments:
        df = pd.DataFrame(segment, columns = names)
        df.to_csv(cluster +  "/" + str(i) + ".csv")
        i += 1


In [78]:
make_perfiles("/home/alt9193/Documents/IA/DeepLearningBackend/", .2 , .5, .7)

/home/alt9193/Documents/IA/DeepLearningBackend//cluster.csv


In [79]:
def make_perfiles_info(cluster):
    info = [{}, {}, {}, {}]
    for i in range(4):
        df = dd.read_csv(cluster + "/" + str(i) + ".csv")

        AMOUNT = df.shape[0].compute()
        BILL_AMOUNT = df['BILL_AMOUNT'].sum().compute()
        PREPAID_LINES = df['PREPAID_LINES'].sum().compute()
        POSTPAID_LINES = df['POSTPAID_LINES'].sum().compute()
        OTHER_LINES = df['OTHER_LINES'].sum().compute()
        PARTY_REV = df['PARTY_REV'].sum().compute()

        info[i] = {
            "amount" : AMOUNT,
            "bill amount" : BILL_AMOUNT,
            "lines" : [{"type" : "PREPAID_LINES", "amount" : PREPAID_LINES},{"type" : "POSTPAID_LINES", "amount" : POSTPAID_LINES}, {"type" : "OTHER_LINES", "amount" : OTHER_LINES}],
            "revenues" : PARTY_REV
        }

    return info

In [80]:
make_perfiles_info("/home/alt9193/Documents/IA/DeepLearningBackend/")

[{'amount': 4,
  'bill amount': 1812.6212345821666,
  'lines': [{'type': 'PREPAID_LINES', 'amount': 13.0},
   {'type': 'POSTPAID_LINES', 'amount': 17.0},
   {'type': 'OTHER_LINES', 'amount': 8.0}],
  'revenues': 16402.338333333333},
 {'amount': 0,
  'bill amount': 0,
  'lines': [{'type': 'PREPAID_LINES', 'amount': 0},
   {'type': 'POSTPAID_LINES', 'amount': 0},
   {'type': 'OTHER_LINES', 'amount': 0}],
  'revenues': 0},
 {'amount': 0,
  'bill amount': 0,
  'lines': [{'type': 'PREPAID_LINES', 'amount': 0},
   {'type': 'POSTPAID_LINES', 'amount': 0},
   {'type': 'OTHER_LINES', 'amount': 0}],
  'revenues': 0},
 {'amount': 0,
  'bill amount': 0,
  'lines': [{'type': 'PREPAID_LINES', 'amount': 0},
   {'type': 'POSTPAID_LINES', 'amount': 0},
   {'type': 'OTHER_LINES', 'amount': 0}],
  'revenues': 0}]