In [1]:
import pyodbc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
import win32com.client
import os
from os import chdir, getcwd
import time
from factor_analyzer import FactorAnalyzer

import sweetviz as sv

import dask.dataframe as dd
import dask.array as da

from sklearn.neighbors import LocalOutlierFactor
from numpy import quantile, where, random
from sklearn.ensemble import IsolationForest

import pickle

import warnings
warnings.filterwarnings(action='once')


from tqdm import tqdm #libreria para ver el tiempo que demora el proceso
pd.options.display.max_columns = 50

# Definición de fucniones y fuente de datos

## Fuente de datos
Carga de la fuente de datos de 10M de líneas de 1 - 3 meses, al cual se le aplico el promedio.

In [2]:
ddf = dd.read_csv('./data/in_actividad_celular_media_arit_dic_feb2022.csv',sep='|')

## Función de Deteccion de Outliers

In [5]:
def deteccion_outlier_producto(ddf, clf , indicadores_positivos, indicadores_negativos=[]):
    df_2 = ddf[['SUBSCRIBER_ID','ID_SUBPRODUCTO_1_3']]

    for x in indicadores_positivos:
        df_2[x] = (ddf[x] - ddf[x].min(axis=0)).divide(ddf[x].max(axis=0) - ddf[x].min(axis=0))

    for y in indicadores_negativos:
        df_2[y] = (ddf[y].max(axis=0)-ddf[y]) / (ddf[y].max(axis=0) - ddf[y].min(axis=0))

    to_columns = df_2.columns[1:]

    pred_ppa = clf.fit_predict(df_2.loc[df_2['ID_SUBPRODUCTO_1_3'] == 0,to_columns])

    # Se cambia a pandas dataframe PPA
    df_final_ppa = df_2[df_2['ID_SUBPRODUCTO_1_3'] == 0].compute()

    df_final_ppa['anomaly'] = pred_ppa

    outliers_ppa=df_final_ppa.loc[df_final_ppa['anomaly']==-1]
    #outlier_ppa_index=list(outliers_ppa.index)

    sin_outliers_ppa=df_final_ppa.loc[df_final_ppa['anomaly']==1]
    #sin_outliers_ppa_index=list(sin_outliers_ppa.index)

    sin_outliers_ppa.drop(columns='anomaly', inplace=True)
    outliers_ppa.drop(columns='anomaly', inplace=True)

    # Se cambia a pandas dataframe POS
    pred_pos = clf.fit_predict(df_2.loc[df_2['ID_SUBPRODUCTO_1_3'] == 1,to_columns])

    df_final_pos = df_2[df_2['ID_SUBPRODUCTO_1_3'] == 1].compute()

    df_final_pos['anomaly'] = pred_pos

    outliers_pos=df_final_pos.loc[df_final_pos['anomaly']==-1]
    #outlier_pos_index=list(outliers_pos.index)

    sin_outliers_pos=df_final_pos.loc[df_final_pos['anomaly']==1]
    #sin_outliers_pos_index=list(sin_outliers_pos.index)

    sin_outliers_pos.drop(columns='anomaly', inplace=True)
    outliers_pos.drop(columns='anomaly', inplace=True)

    return outliers_ppa, sin_outliers_ppa,outliers_pos, sin_outliers_pos

## Analisis Factorial

#### Cargas Factoriales

In [6]:
def analisis_factorial(sin_outliers, outliers, columnas_eliminar):
    fa = FactorAnalyzer()
    fa.analyze(sin_outliers.drop(columns=columnas_eliminar)
            ,sin_outliers.drop(columns=columnas_eliminar).shape[1], rotation=None)
    # Eigenvalues
    ev, v = fa.get_eigenvalues()

    fa = FactorAnalyzer()
    fa.analyze(sin_outliers.drop(columns=columnas_eliminar)
            , len(ev[ev['Original_Eigenvalues']>=1]), rotation="varimax")
    # Factores de carga
    loadings_carga = fa.loadings
    # Cuadrados de las cargas factoriales
    cuadrado_loadings_carga = loadings_carga**2
    print("------Cuadrados de las cargas factoriales------")
    print(cuadrado_loadings_carga)
    # Varianza total: Sumatoria de las varianzas de cada factor
    varianza_por_factor = (cuadrado_loadings_carga).sum(axis=0).tolist()
    print("Varianza por factor:", varianza_por_factor)
    varianza_total = (cuadrado_loadings_carga).sum(axis=0).sum()
    print("Varianza total:", varianza_total)
    # Varianza Explicada sobre Varianza Total por factor
    VE_VT = varianza_por_factor/varianza_total
    print("Varianza por factor:",VE_VT)
    # Cargas factoriales normalizadas (contribución a la varianza de cada variable)
    cargas_factoriales_normalizadas = (cuadrado_loadings_carga)/varianza_por_factor
    print("------Cargas factoriales normalizadas------")
    print(cargas_factoriales_normalizadas)
    # Identificación de pertenencia de cada variable a factor
    max_loadings_carga_indexobj = (cuadrado_loadings_carga/varianza_por_factor).idxmax(axis=1)
    print("------Identificación de pertenencia de cada variable a factor------")
    print(max_loadings_carga_indexobj)
    print("Longitud de arreglo",cuadrado_loadings_carga.shape[1:2][0])
    # Pesos de cada variable
    lista = [np.nan]*cargas_factoriales_normalizadas.shape[0]

    for x,i,j in tqdm( zip( cargas_factoriales_normalizadas.index,max_loadings_carga_indexobj.tolist(),range(0,cargas_factoriales_normalizadas.shape[0]) ) ):
        lista[j]=cargas_factoriales_normalizadas.loc[x,i]
    print("------Pesos de cada variable------")
    print(lista)
    pesos = lista[:cargas_factoriales_normalizadas.shape[0]]
    
    VE_VT_map = pd.DataFrame(data=VE_VT.reshape(1,cuadrado_loadings_carga.shape[1:2][0]), index=[0], columns=cuadrado_loadings_carga.columns)
    lista_ve_vt = [np.nan]*cargas_factoriales_normalizadas.shape[0]

    for i,j in zip(max_loadings_carga_indexobj.tolist(),range(0,cargas_factoriales_normalizadas.shape[0])):
        lista_ve_vt[j]=VE_VT_map[i][0]

    #print(lista_ve_vt)
    factor_para_pesos = lista_ve_vt[:cargas_factoriales_normalizadas.shape[0]]

    pesos_finales = np.array(pesos)*np.array(factor_para_pesos)

    # Pesos normalizados
    pesos_normalizados = pesos_finales/sum(pesos_finales)

    # Multiplicación de cada observación por variable por el peso ponderado correspondiente

    ## concatenar la base de outliers y sin outliers para aplicarle los pesos.
    df_concatenado = sin_outliers.append(outliers)

    df_concatenado_2 = df_concatenado[columnas_eliminar]

    for x in indicadores_positivos:
        df_concatenado_2[x] = (df_concatenado[x] - df_concatenado[x].min(axis=0)).divide(df_concatenado[x].max(axis=0) - df_concatenado[x].min(axis=0))

    for y in indicadores_negativos:
        df_concatenado_2[y] = (df_concatenado[y].max(axis=0)-df_concatenado[y]) / (df_concatenado[y].max(axis=0) - df_concatenado[y].min(axis=0))
    
    matriz_ponderada = df_concatenado_2.drop(columns=columnas_eliminar)*pesos_normalizados

    return df_concatenado_2,matriz_ponderada

## Indicador

In [7]:
def generar_indicador(df_concatenado, matriz_ponderada, ddf, subproducto):
    indicador_grupo2 = pd.concat([df_concatenado,matriz_ponderada.sum(axis=1)],axis=1)

    indicador_grupo2.rename(columns={0:'Indicador'},inplace=True)

    indicador_grupo2.sort_values(by=['Indicador'], ascending=False).reset_index().drop(columns='index')

    score_final = pd.concat([ indicador_grupo2, pd.cut(indicador_grupo2.rename(columns={'Indicador':'Intervalo Indicador'})['Intervalo Indicador'], bins=1000),
                         pd.cut( indicador_grupo2.rename(columns={'Indicador':'Score'})['Score'], bins=1000, labels=np.arange(1000) ) ],axis=1)

    df2_pd = ddf[ ddf['ID_SUBPRODUCTO_1_3'] == subproducto ].compute()
    df2_pd = df2_pd.round(2)

    score_final_datos = score_final.sort_values(by=['Indicador'], ascending=False).reset_index().drop(columns='index')[['SUBSCRIBER_ID','Indicador','Intervalo Indicador','Score']].merge(df2_pd,on=['SUBSCRIBER_ID'])

    score_final_datos['Score'] = score_final_datos['Score'].astype(str)
    score_final_datos['Intervalo Indicador'] = score_final_datos['Intervalo Indicador'].astype(str)

    return score_final_datos


# Deteccion de outliers

In [8]:
indicadores_positivos = [
       'DIAS_USO_VOZ_1_3', 'DIAS_USO_VOZ_DATOS_1_3', 'TRAFICO_VOZ_1_3', 
       'TRAFICO_DATOS_1_3', 'TRAFICO_SMS_1_3', 
       'CANT_CONTACTOS_LLAMADOS_1_3', 'CANT_LLAMADAS_ENTRANTES_1_3', 'CANT_LLAMADAS_SALIENTES_1_3',
       'ANTIGUEDAD_1_3', 'PROMEDIO_LLAMADAS_ENTRANTES_1_3', 'PROMEDIO_LLAMADAS_SALIENTES_1_3'
                        ]

indicadores_negativos = [
                        'DIAS_RECENCIA_1_3'
                        ]

In [9]:
clf=IsolationForest(n_estimators=100, max_samples='auto', contamination='auto', \
                        max_features=1.0, bootstrap=False, n_jobs=-1, random_state=42, verbose=0)

In [10]:
outliers_ppa, sin_outliers_ppa,\
    outliers_pos, sin_outliers_pos = deteccion_outlier_producto(ddf, clf , indicadores_positivos, indicadores_negativos)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sin_outliers_ppa.drop(columns='anomaly', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outliers_ppa.drop(columns='anomaly', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sin_outliers_pos.drop(columns='anomaly', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outlier

# Actividad Celular - Multivariable

In [None]:
## Se definen variables que se utilizaran en las ejecuciones de PPA y POS
columnas_eliminar = ['SUBSCRIBER_ID','ID_SUBPRODUCTO_1_3','DIAS_RECENCIA_1_3']

## Ejecución de PPA

In [82]:
df_concatenado_2_ppa, matriz_ponderada_ppa = analisis_factorial(sin_outliers_ppa, outliers_ppa,columnas_eliminar)

  return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
  smc = 1 - 1 / sp.diag(corr_inv)
  model = sp.dot(loadings, loadings.T)
  error = sp.sum(residual)
  sp.fill_diagonal(corr_mtx, 1 - solution)
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
  smc = 1 - 1 / sp.diag(corr_inv)
  model = sp.dot(loadings, loadings.T)
  error = sp.sum(residual)
  sp.fill_diagonal(corr_mtx, 1 - solution)
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))


------Cuadrados de las cargas factoriales------
                                  Factor1   Factor2   Factor3
DIAS_USO_VOZ_1_3                 0.650770  0.008256  0.220592
DIAS_USO_VOZ_DATOS_1_3           0.185248  0.005755  0.509344
TRAFICO_VOZ_1_3                  0.436640  0.000048  0.085895
TRAFICO_DATOS_1_3                0.125862  0.003443  0.299492
TRAFICO_SMS_1_3                  0.000954  0.000007  0.002061
CANT_CONTACTOS_LLAMADOS_1_3      0.375558  0.076361  0.087210
CANT_LLAMADAS_ENTRANTES_1_3      0.079276  0.387818  0.017362
CANT_LLAMADAS_SALIENTES_1_3      0.865329  0.000477  0.086536
ANTIGUEDAD_1_3                   0.004877  0.073486  0.012725
PROMEDIO_LLAMADAS_ENTRANTES_1_3  0.000039  0.122737  0.000758
PROMEDIO_LLAMADAS_SALIENTES_1_3  0.009759  0.220803  0.001817
Varianza por factor: [2.734311774324861, 0.8991916021802504, 1.3237924296205708]
Varianza total: 4.957295806125682
Varianza por factor: [0.55157325 0.18138752 0.26703922]
------Cargas factoriales normalizadas

11it [00:00, ?it/s]

------Pesos de cada variable------
[0.23800125842074593, 0.3847609371083352, 0.15968928122224743, 0.22623791634055018, 0.0015572204667779993, 0.1373502357400354, 0.4312960395015167, 0.31647064554780857, 0.08172491075491793, 0.13649700195035927, 0.24555727043067008]



  df_concatenado = sin_outliers.append(outliers)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_concatenado_2[x] = (df_concatenado[x] - df_concatenado[x].min(axis=0)).divide(df_concatenado[x].max(axis=0) - df_concatenado[x].min(axis=0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_concatenado_2[x] = (df_concatenado[x] - df_concatenado[x].min(axis=0)).divide(df_concatenado[x].max(axis=0) - df_concatenado[x].min(axis=0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the 

In [83]:
score_final_datos_ppa = generar_indicador(df_concatenado_2_ppa, matriz_ponderada_ppa, ddf, 0)

## Ejecución de POS

In [84]:
df_concatenado_2_pos, matriz_ponderada_pos = analisis_factorial(sin_outliers_pos, outliers_pos, columnas_eliminar)

  return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
  smc = 1 - 1 / sp.diag(corr_inv)
  model = sp.dot(loadings, loadings.T)
  error = sp.sum(residual)
  sp.fill_diagonal(corr_mtx, 1 - solution)
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
  smc = 1 - 1 / sp.diag(corr_inv)
  model = sp.dot(loadings, loadings.T)
  error = sp.sum(residual)
  sp.fill_diagonal(corr_mtx, 1 - solution)
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))


------Cuadrados de las cargas factoriales------
                                  Factor1   Factor2   Factor3   Factor4
DIAS_USO_VOZ_1_3                 0.098433  0.004589  0.787532  0.000097
DIAS_USO_VOZ_DATOS_1_3           0.010823  0.056238  0.510714  0.000139
TRAFICO_VOZ_1_3                  0.624650  0.022053  0.019893  0.052739
TRAFICO_DATOS_1_3                0.002738  0.002150  0.149620  0.000897
TRAFICO_SMS_1_3                  0.000003  0.000116  0.010613  0.000036
CANT_CONTACTOS_LLAMADOS_1_3      0.229521  0.018302  0.264105  0.032348
CANT_LLAMADAS_ENTRANTES_1_3      0.608970  0.078913  0.013624  0.194757
CANT_LLAMADAS_SALIENTES_1_3      0.891944  0.004195  0.029842  0.000728
ANTIGUEDAD_1_3                   0.001897  0.025245  0.002568  0.002054
PROMEDIO_LLAMADAS_ENTRANTES_1_3  0.060255  0.597381  0.000003  0.106901
PROMEDIO_LLAMADAS_SALIENTES_1_3  0.000218  0.713389  0.010036  0.006140
Varianza por factor: [2.5294514424971304, 1.5225699706694882, 1.7985488729114212, 0.3968

11it [00:00, ?it/s]

------Pesos de cada variable------
[0.4378706330473652, 0.2839586693585108, 0.2469507226887532, 0.08318902150266823, 0.005900919628347813, 0.1468433368842006, 0.49077625169986805, 0.3526236605644597, 0.016580368917657094, 0.39235059376667936, 0.46854256463981314]



  df_concatenado = sin_outliers.append(outliers)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_concatenado_2[x] = (df_concatenado[x] - df_concatenado[x].min(axis=0)).divide(df_concatenado[x].max(axis=0) - df_concatenado[x].min(axis=0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_concatenado_2[x] = (df_concatenado[x] - df_concatenado[x].min(axis=0)).divide(df_concatenado[x].max(axis=0) - df_concatenado[x].min(axis=0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the 

In [85]:
score_final_datos_pos = generar_indicador(df_concatenado_2_pos, matriz_ponderada_pos, ddf, 1)

## Union entre ppa y postpago

In [86]:
score_final_datos = pd.concat([score_final_datos_ppa,score_final_datos_pos])

In [87]:
nombre_grupo = 'Score multivariable'

In [88]:
score_final_datos.to_csv('./data/Actividad Celular '+nombre_grupo+' 3M.csv',sep='|',decimal=',')



# Actividad Celular - Factor 1

In [11]:
## Se definen variables que se utilizaran en las ejecuciones de PPA y POS
columnas_eliminar = ['SUBSCRIBER_ID','ID_SUBPRODUCTO_1_3','DIAS_RECENCIA_1_3',
                    'DIAS_USO_VOZ_DATOS_1_3', 'TRAFICO_DATOS_1_3', 'TRAFICO_SMS_1_3']

nombre_factor = 'Factor 1'

## Ejecución de PPA

In [12]:
df_concatenado_2_ppa, matriz_ponderada_ppa = analisis_factorial(sin_outliers_ppa, outliers_ppa,columnas_eliminar)

  return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
  smc = 1 - 1 / sp.diag(corr_inv)
  model = sp.dot(loadings, loadings.T)
  error = sp.sum(residual)
  sp.fill_diagonal(corr_mtx, 1 - solution)
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
  smc = 1 - 1 / sp.diag(corr_inv)
  model = sp.dot(loadings, loadings.T)
  error = sp.sum(residual)
  sp.fill_diagonal(corr_mtx, 1 - solution)
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))


------Cuadrados de las cargas factoriales------
                                  Factor1   Factor2
DIAS_USO_VOZ_1_3                 0.844837  0.005855
TRAFICO_VOZ_1_3                  0.526631  0.000064
CANT_CONTACTOS_LLAMADOS_1_3      0.468072  0.068820
CANT_LLAMADAS_ENTRANTES_1_3      0.104281  0.383869
CANT_LLAMADAS_SALIENTES_1_3      0.946289  0.000004
ANTIGUEDAD_1_3                   0.000799  0.065464
PROMEDIO_LLAMADAS_ENTRANTES_1_3  0.000574  0.122626
PROMEDIO_LLAMADAS_SALIENTES_1_3  0.003919  0.223870
Varianza por factor: [2.8954020623919474, 0.870571959346485]
Varianza total: 3.7659740217384323
Varianza por factor: [0.76883219 0.23116781]
------Cargas factoriales normalizadas------
                                  Factor1   Factor2
DIAS_USO_VOZ_1_3                 0.291786  0.006725
TRAFICO_VOZ_1_3                  0.181885  0.000073
CANT_CONTACTOS_LLAMADOS_1_3      0.161660  0.079052
CANT_LLAMADAS_ENTRANTES_1_3      0.036016  0.440938
CANT_LLAMADAS_SALIENTES_1_3      0.3268

8it [00:00, ?it/s]

------Pesos de cada variable------
[0.29178563962871773, 0.18188524332501352, 0.16166045397535078, 0.440938397049005, 0.3268248118667697, 0.0751964556545634, 0.14085656491585805, 0.25715304272592204]



  df_concatenado = sin_outliers.append(outliers)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_concatenado_2[x] = (df_concatenado[x] - df_concatenado[x].min(axis=0)).divide(df_concatenado[x].max(axis=0) - df_concatenado[x].min(axis=0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_concatenado_2[x] = (df_concatenado[x] - df_concatenado[x].min(axis=0)).divide(df_concatenado[x].max(axis=0) - df_concatenado[x].min(axis=0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the 

In [13]:
score_final_datos_ppa = generar_indicador(df_concatenado_2_ppa, matriz_ponderada_ppa, ddf, 0)

## Ejecución de POS

In [14]:
df_concatenado_2_pos, matriz_ponderada_pos = analisis_factorial(sin_outliers_pos, outliers_pos, columnas_eliminar)

  return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
  smc = 1 - 1 / sp.diag(corr_inv)
  model = sp.dot(loadings, loadings.T)
  error = sp.sum(residual)
  sp.fill_diagonal(corr_mtx, 1 - solution)
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
  smc = 1 - 1 / sp.diag(corr_inv)
  model = sp.dot(loadings, loadings.T)
  error = sp.sum(residual)
  sp.fill_diagonal(corr_mtx, 1 - solution)
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))
  loadings = sp.dot(vectors, sp.diag(sp.sqrt(np.maximum(values, 0))))


------Cuadrados de las cargas factoriales------
                                  Factor1   Factor2
DIAS_USO_VOZ_1_3                 0.348055  0.000017
TRAFICO_VOZ_1_3                  0.512633  0.051211
CANT_CONTACTOS_LLAMADOS_1_3      0.470875  0.010063
CANT_LLAMADAS_ENTRANTES_1_3      0.545675  0.088047
CANT_LLAMADAS_SALIENTES_1_3      0.829929  0.023361
ANTIGUEDAD_1_3                   0.005644  0.016520
PROMEDIO_LLAMADAS_ENTRANTES_1_3  0.037271  0.425636
PROMEDIO_LLAMADAS_SALIENTES_1_3  0.006875  0.984950
Varianza por factor: [2.7569565371242413, 1.59980552037619]
Varianza total: 4.356762057500431
Varianza por factor: [0.63279943 0.36720057]
------Cargas factoriales normalizadas------
                                  Factor1   Factor2
DIAS_USO_VOZ_1_3                 0.126246  0.000010
TRAFICO_VOZ_1_3                  0.185941  0.032011
CANT_CONTACTOS_LLAMADOS_1_3      0.170795  0.006290
CANT_LLAMADAS_ENTRANTES_1_3      0.197927  0.055036
CANT_LLAMADAS_SALIENTES_1_3      0.301031

8it [00:00, 8025.46it/s]

------Pesos de cada variable------
[0.1262460843597112, 0.1859414828522037, 0.1707950711038244, 0.19792663407023042, 0.301031026868394, 0.0103263661732792, 0.2660550523759951, 0.6156687019257366]



  df_concatenado = sin_outliers.append(outliers)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_concatenado_2[x] = (df_concatenado[x] - df_concatenado[x].min(axis=0)).divide(df_concatenado[x].max(axis=0) - df_concatenado[x].min(axis=0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_concatenado_2[x] = (df_concatenado[x] - df_concatenado[x].min(axis=0)).divide(df_concatenado[x].max(axis=0) - df_concatenado[x].min(axis=0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the 

In [15]:
score_final_datos_pos = generar_indicador(df_concatenado_2_pos, matriz_ponderada_pos, ddf, 1)

## Union entre ppa y postpago

In [16]:
score_final_datos = pd.concat([score_final_datos_ppa,score_final_datos_pos])

In [17]:
score_final_datos.to_csv('./data/Actividad Celular '+nombre_factor+' 3M.csv',sep='|',decimal=',')

In [None]:
# 