In [None]:
%%bash
pip install duckdb
pip install jupysql
pip install duckdb-engine
pip install pandas
pip install google-cloud-storage


In [None]:
import duckdb
import pandas as pd

from google.cloud import storage
from google.colab import auth
auth.authenticate_user()

from sqlalchemy import create_engine, text
from sqlalchemy.pool import reset_commit
import gc
import sys

%load_ext sql
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:////content/dmeyf.db

%sql INSTALL httpfs;
%sql LOAD httpfs;
%sql SET s3_endpoint='storage.googleapis.com';
%sql SET s3_access_key_id='GOOGG3YM7GWPR2PZDRFYO4Y6';
%sql SET s3_secret_access_key='xxx';

In [None]:
# Cargar dataset
%%sql
create or replace table competencia_03 as
select
    *
from read_csv_auto('s3://buk-tlsssr-dmeyf/datasets/competencia_03_crudo.csv.gz')

In [None]:
%%sql
create or replace table datos_temp AS
WITH aux AS  (
    SELECT numero_de_cliente, foto_mes,
    (foto_mes//100-2019)*12+foto_mes%100 as mes,
    IF(foto_mes = (select max(foto_mes) from datos_temp) ,
    -1, lead(mes,1,0) OVER (PARTITION BY "numero_de_cliente" ORDER BY numero_de_cliente, mes)) AS 'mes+1',
    IF(foto_mes = (select max(foto_mes) from datos_temp) OR foto_mes = ((select max(foto_mes) from datos_temp)-1),
     -1,lead(mes,2,0) OVER (PARTITION BY "numero_de_cliente" ORDER BY numero_de_cliente, mes)) AS 'mes+2',
     IF(foto_mes >= ((select max(foto_mes) from datos_temp)-2),
     -1,lead(mes,3,0) OVER (PARTITION BY "numero_de_cliente" ORDER BY numero_de_cliente, mes)) AS 'mes+3',
    FROM
       datos_temp
    ORDER BY numero_de_cliente, foto_mes)
SELECT *,
CASE
    -- Condiciones de borde:
    WHEN ("mes+1"==-1) THEN NULL
    WHEN ("mes+2"==-1) THEN NULL
    WHEN ("mes+3"==-1) THEN NULL
    -- Si mes siguiente no es consecutivo o no existe:
    WHEN ("mes"+1 != "mes+1") THEN 'BAJA+1'
    WHEN ("mes"+2 != "mes+2") THEN 'BAJA+2'
    WHEN ("mes"+3 != "mes+3") THEN 'BAJA+3'
    -- si proximos 2 meses consecutivos activos:
    ELSE 'CONTINUA' END AS "clase_ternaria"
FROM aux
LEFT JOIN datos_temp c USING (numero_de_cliente, foto_mes);
commit;

In [None]:
def consulta_sql(consulta):
    engine = create_engine("duckdb:////content/dmeyf.db")
    with engine.connect() as db:
      db.execute(text(consulta))
      db.commit()
      db.close()
      engine.dispose()
      gc.collect()
    print(f"Ejecutado: {consulta}")

In [None]:
# Cambiar null por cero en master y visa (sé que probablemente es innecesario pero lo hice cuando no lo sabía)
engine = create_engine("duckdb:////content/dmeyf.db")

with engine.connect() as db:
  consulta = f"SELECT column_name FROM information_schema.columns WHERE table_name = 'datos_temp';"
  columnas =  list(db.execute(text(consulta)).fetchall())
columnas = [r for r, in columnas]
db.close()
engine.dispose()
gc.collect()

for col in columnas:
    if col.startswith("Visa_") or col.startswith("Master_"):
        consulta = f"update datos_temp set {col} = 0 where {col} is null;"
        consulta_sql(consulta)


otras = ['mtarjeta_master_descuentos','mtarjeta_visa_descuentos','cmobile_app_trx']
for col in otras:
      consulta = f"update datos_temp set {col} = 0 where {col} is null;"
      consulta_sql(consulta)

In [None]:
# Castear algunas columnas
%%sql
UPDATE datos_temp SET cmobile_app_trx = CAST(cmobile_app_trx AS INTEGER);
UPDATE datos_temp SET tmobile_app = CAST(tmobile_app AS INTEGER);
ALTER TABLE datos_temp ALTER COLUMN cmobile_app_trx SET DATA TYPE INTEGER;
ALTER TABLE datos_temp ALTER COLUMN tmobile_app SET DATA TYPE INTEGER;
commit;

In [None]:
# asigno null a las variables rotas
engine = create_engine("duckdb:////content/dmeyf.db")
with engine.connect() as conn:
  consulta = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'datos_temp';"
  tipos =  dict(conn.execute(text(consulta)).fetchall())
  columnas = tipos.keys()
engine.dispose()
conn.close()
engine.dispose()
gc.collect()

for i, col in enumerate(columnas):
  if not col.startswith("ct_0_") and col != "clase_ternaria":
    engine = create_engine("duckdb:////content/dmeyf.db")
    with engine.connect() as db:
      consulta = f"SELECT sum({col}), foto_mes  FROM datos_temp group by foto_mes;"
      res =  list(db.execute(text(consulta)).fetchall())
      for sum, mes in res:
        if sum == 0:
          consulta = f"""
                        UPDATE datos_temp set {col} = NULL
                        WHERE datos_temp.foto_mes={mes};"""
          db.execute(text(consulta))
          print(f"actualizado {col} {mes}")
          db.commit()
    db.close()
    engine.dispose()
    gc.collect()

In [None]:
# feature engineering
%%sql
ALTER TABLE datos_temp ADD COLUMN cmobile_app_trx_2 INTEGER;
UPDATE datos_temp SET cmobile_app_trx_2 = CAST(cmobile_app_trx AS INTEGER);
commit;

ALTER TABLE datos_temp ADD COLUMN sum_cseguro INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_ctarjeta INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mtarjeta FLOAT;
ALTER TABLE datos_temp ADD COLUMN sum_cpagos INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mpagos FLOAT;
ALTER TABLE datos_temp ADD COLUMN sum_cpayroll INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mpayroll FLOAT;
ALTER TABLE datos_temp ADD COLUMN sum_mcuenta FLOAT;
ALTER TABLE datos_temp ADD COLUMN sum_transacciones INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_consumo FLOAT;
ALTER TABLE datos_temp ADD COLUMN sum_inversion INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_inversion_dolares FLOAT;
ALTER TABLE datos_temp ADD COLUMN sum_inversion_pesos FLOAT;
ALTER TABLE datos_temp ADD COLUMN sum_descuentos INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_descuentos_m FLOAT;
ALTER TABLE datos_temp ADD COLUMN sum_comisiones INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_comisiones_m FLOAT;
ALTER TABLE datos_temp ADD COLUMN mtransferencias FLOAT;
ALTER TABLE datos_temp ADD COLUMN cajas_balance INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_atm FLOAT;
ALTER TABLE datos_temp ADD COLUMN sum_transacciones_otras INTEGER;
commit;

UPDATE datos_temp SET sum_cseguro = cseguro_vida + cseguro_auto + cseguro_vivienda + cseguro_accidentes_personales;
UPDATE datos_temp SET sum_ctarjeta = ctarjeta_visa_debitos_automaticos + ctarjeta_master_debitos_automaticos + ccuenta_debitos_automaticos;
UPDATE datos_temp SET sum_mtarjeta = mttarjeta_visa_debitos_automaticos + mttarjeta_master_debitos_automaticos + mcuenta_debitos_automaticos;
UPDATE datos_temp SET sum_cpagos = cpagodeservicios + cpagomiscuentas;
UPDATE datos_temp SET sum_mpagos = mpagodeservicios + mpagomiscuentas;
UPDATE datos_temp SET sum_cpayroll = cpayroll_trx + cpayroll2_trx;
UPDATE datos_temp SET sum_mpayroll = mpayroll + mpayroll2;
UPDATE datos_temp SET sum_mcuenta = mcuenta_corriente_adicional + mcuenta_corriente + mcaja_ahorro + mcaja_ahorro_adicional;
UPDATE datos_temp SET sum_ctarjeta = ctarjeta_visa + ctarjeta_master;
UPDATE datos_temp SET sum_transacciones = ctarjeta_visa_transacciones + ctarjeta_master_transacciones;
UPDATE datos_temp SET sum_consumo = mtarjeta_visa_consumo + mtarjeta_master_consumo;
UPDATE datos_temp SET sum_inversion = cplazo_fijo + cinversion1 + cinversion2;
UPDATE datos_temp SET sum_inversion_dolares = mplazo_fijo_dolares + minversion1_dolares;
UPDATE datos_temp SET sum_inversion_pesos = mplazo_fijo_pesos + minversion1_pesos + minversion2;
UPDATE datos_temp SET sum_descuentos = ccajeros_propios_descuentos + ctarjeta_visa_descuentos + ctarjeta_master_descuentos;
UPDATE datos_temp SET sum_descuentos_m = mcajeros_propios_descuentos + mtarjeta_visa_descuentos + mtarjeta_master_descuentos;
UPDATE datos_temp SET sum_comisiones = ccomisiones_mantenimiento + ccomisiones_otras;
UPDATE datos_temp SET sum_comisiones_m = mcomisiones_mantenimiento + mcomisiones_otras;
UPDATE datos_temp SET mtransferencias = mtransferencias_recibidas / mtransferencias_emitidas;
UPDATE datos_temp SET cajas_balance = ccajas_depositos - ccajas_extracciones;
UPDATE datos_temp SET sum_atm = matm + matm_other;
UPDATE datos_temp SET sum_transacciones_otras = ctarjeta_debito_transacciones + ccallcenter_transacciones + chomebanking_transacciones + ccajas_transacciones + catm_trx + catm_trx_other + cmobile_app_trx_2;
UPDATE datos_temp SET sum_transacciones_otras = 0 WHERE sum_transacciones_otras IS NULL;
commit;

In [None]:
# más feature engineering
%%sql
ALTER TABLE datos_temp ADD COLUMN sum_mlimitecompra  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mfinanciacion_limite  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_msaldototal  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_msaldopesos  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_msaldodolares INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mconsumospesos  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mconsumosdolares  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_madelantopesos  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_madelantodolares  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mpagado  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mpagospesos  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mpagosdolares  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mconsumototal  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_cconsumos INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_cadelantosefectivo  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_mpagominimo  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_delinquency  INTEGER;
ALTER TABLE datos_temp ADD COLUMN sum_margen  INTEGER;
commit;

UPDATE datos_temp SET sum_mlimitecompra = Master_mlimitecompra + Visa_mlimitecompra;
UPDATE datos_temp SET sum_mfinanciacion_limite = Master_mfinanciacion_limite + Visa_mfinanciacion_limite;
UPDATE datos_temp SET sum_msaldototal = Visa_msaldototal + Master_msaldototal;
UPDATE datos_temp SET sum_msaldopesos = Visa_msaldopesos + Master_msaldopesos;
UPDATE datos_temp SET sum_msaldodolares  = Visa_msaldodolares + Master_msaldodolares;
UPDATE datos_temp SET sum_mconsumospesos = Visa_mconsumospesos + Master_mconsumospesos;
UPDATE datos_temp SET sum_mconsumosdolares = Visa_mconsumosdolares + Master_mconsumosdolares;
UPDATE datos_temp SET sum_madelantopesos = Master_madelantopesos + Visa_madelantopesos;
UPDATE datos_temp SET sum_madelantodolares =Master_madelantodolares + Visa_madelantodolares;
UPDATE datos_temp SET sum_mpagado =Master_mpagado +Visa_mpagado;
UPDATE datos_temp SET sum_mpagospesos =Master_mpagospesos +Visa_mpagospesos;
UPDATE datos_temp SET sum_mpagosdolares = Master_mpagosdolares + Visa_mpagosdolares;
UPDATE datos_temp SET sum_mconsumototal = Master_mconsumototal + Visa_mconsumototal;
UPDATE datos_temp SET sum_cconsumos= Master_cconsumos + Visa_cconsumos;
UPDATE datos_temp SET sum_cadelantosefectivo = Master_cadelantosefectivo + Visa_cadelantosefectivo;
UPDATE datos_temp SET sum_mpagominimo = Master_mpagominimo + Visa_mpagominimo;
UPDATE datos_temp SET sum_delinquency = Master_delinquency + Visa_delinquency;
UPDATE datos_temp SET sum_margen = mcomisiones + mactivos_margen + mpasivos_margen;
ALTER TABLE datos_temp DROP COLUMN tmobile_app;
commit;


In [None]:
# funciones para lags
import psutil
import gc

def add_historicos_con_bins(tabla, col, cant, percentiles):
    e = create_engine("duckdb:////content/dmeyf.db")
    with e.connect() as db:
      consulta = f"SELECT {col} FROM {tabla} LIMIT 1;"
      try:
        db.execute(text(consulta)).fetchone()
      except:
        print(f"{col} ya creada")
        return
      else:
        print(f"add historicos con bins: {col}")
        memory(1)
        consulta = f"ALTER TABLE {tabla} ADD COLUMN {col}_0 INTEGER;"
        try:
          db.execute(text(consulta))
        except:
          pass
        for x in cant:
          consulta = f"ALTER TABLE {tabla} ADD COLUMN {col}_{x} INTEGER;"
          try:
            db.execute(text(consulta))
          except:
            pass
        db.commit()
        n = percentiles
        p = [i/n for i in range(n+1)]
        sql_asignacion = ""
        for mes in range(1,34):
          lista = db.execute(text(f"select quantile_disc({col},{p}) as q from datos_temp where mes={mes}")).fetchone()[0]
          if lista:
            rangos = [(lista[i], lista[i+1]) for i in range(len(lista)-1)]
            for i, (inicio, fin) in enumerate(rangos):
                sql_asignacion += f"WHEN mes = {mes} AND {col} >= {inicio} AND {col} < {fin} THEN {i+1} "
          else:
            sql_asignacion += f"WHEN mes = {mes} THEN NULL "
          sql_asignacion += "\n"
        sql_asignacion += "ELSE NULL END"
        consulta_sql = f"UPDATE datos_temp SET {col}_0 = (CASE {sql_asignacion})"
        db.execute(text(consulta_sql))

        n = cant
        particiones =""
        for i in cant:
            particiones += f"IFNULL(LAG({col}_0, {i}) OVER (PARTITION BY numero_de_cliente ORDER BY mes), NULL) AS {col}_{i},\n"

        operaciones=""
        for i in cant:
            operaciones += f"{col}_{i} = t.{col}_{i},\n"

        consulta = f"""
        WITH t AS
        (SELECT
        numero_de_cliente,
        mes,
        {col},
        {particiones}
        FROM {tabla})
        UPDATE {tabla} SET
        {operaciones}
        FROM t
        WHERE ({tabla}.numero_de_cliente = t.numero_de_cliente AND {tabla}.mes = t.mes);
        """
        db.execute(text(consulta))
        db.execute(text(f"ALTER TABLE '{tabla}' DROP COLUMN {col};"))
        memory(2)
      finally:
        memory(3)
        db.commit()
        memory(4)
        db.invalidate()
        memory(5)
        db.close()
        memory(6)
        e.dispose()
        memory(7)
        gc.collect()
        memory(8)

def crear_bins(tabla, col, percentiles):
    e = create_engine("duckdb:////content/dmeyf.db")
    with e.connect() as db:
      consulta = f"SELECT {col} FROM {tabla} LIMIT 1;"
      try:
        db.execute(text(consulta)).fetchone()
      except:
        print(f"{col} ya creada")
        return
      else:
        print(f"add bins: {col}")
        memory(1)
        consulta = f"ALTER TABLE {tabla} ADD COLUMN {col}_0 INTEGER;"
        try:
          db.execute(text(consulta))
        except:
          pass
        db.commit()
        n = percentiles
        p = [i/n for i in range(n+1)]
        sql_asignacion = ""
        for mes in range(1,34):
          lista = db.execute(text(f"select quantile_disc({col},{p}) as q from datos_temp where mes={mes}")).fetchone()[0]
          if lista:
            rangos = [(lista[i], lista[i+1]) for i in range(len(lista)-1)]
            for i, (inicio, fin) in enumerate(rangos):
                sql_asignacion += f"WHEN mes = {mes} AND {col} >= {inicio} AND {col} < {fin} THEN {i+1} "
          else:
            sql_asignacion += f"WHEN mes = {mes} THEN NULL "
          sql_asignacion += "\n"
        sql_asignacion += "ELSE NULL END"
        consulta_sql = f"UPDATE datos_temp SET {col}_0 = (CASE {sql_asignacion})"
        db.execute(text(consulta_sql))
      finally:
        db.commit()
        db.invalidate()
        db.close()
        e.dispose()
        gc.collect()
        memory(8)

def add_historicos(tabla, col, cant, tipo_col):
    engine = create_engine("duckdb:////content/dmeyf.db")
    with engine.connect() as db:
      consulta = f"SELECT {col} FROM {tabla} LIMIT 1;"
      try:
        db.execute(text(consulta)).fetchone()
      except:
        print(f"{col} ya creada")
        return
      else:
        print(f"add historicos: {col}")
        for x in cant:
          consulta = f"ALTER TABLE {tabla} ADD COLUMN {col}_{x} {tipo_col};"
          try:
            db.execute(text(consulta))
          except:
            pass
        db.commit()
        n = cant
        particiones =""
        for i in cant:
            particiones += f"IFNULL(LAG({col}, {i}) OVER (PARTITION BY numero_de_cliente ORDER BY mes), NULL) AS {col}_{i},\n"

        operaciones=""
        for i in cant:
            operaciones += f"{col}_{i} = t.{col}_{i},\n"

        consulta = f"""
        WITH t AS
        (SELECT
        numero_de_cliente,
        mes,
        {col},
        {particiones}
        FROM {tabla})
        UPDATE {tabla} SET
        {operaciones}
        FROM t
        WHERE ({tabla}.numero_de_cliente = t.numero_de_cliente AND {tabla}.mes = t.mes);
        """
        db.execute(text(consulta))
        db.execute(text(f"ALTER TABLE {tabla} RENAME {col} TO {col}_0;"))
      finally:
        db.commit()
        db.close()
        engine.dispose()
        gc.collect()


def eliminar_columna(tabla, col):
    engine = create_engine("duckdb:////content/dmeyf.db")
    with engine.connect() as db:
      consulta = f"SELECT {col} FROM {tabla} LIMIT 1;"
      try:
        db.execute(text(consulta)).fetchone()
      except Exception as e:
        print(f"{col} no existe")
        return
      print(f"Eliminando: {col}")
      db.execute(text(f"ALTER TABLE '{tabla}' DROP COLUMN {col};"))
      db.commit()
    db.close()
    engine.dispose()

def memoria_variables(variables_locales, variables_globales):
    variable_sizes = [(nombre, sys.getsizeof(variable)) for nombre, variable in variables_locales.items()]
    variable_sizes.sort(key=lambda x: x[1], reverse=True)
    for i, (nombre, tamaño) in enumerate(variable_sizes[:3]):
        print(f"Variable local #{i+1}: '{nombre}' ocupa {tamaño} bytes en memoria")

    variable_sizes = [(nombre, sys.getsizeof(variable)) for nombre, variable in variables_globales.items()]
    variable_sizes.sort(key=lambda x: x[1], reverse=True)
    for i, (nombre, tamaño) in enumerate(variable_sizes[:3]):
        print(f"Variable global #{i+1}: '{nombre}' ocupa {tamaño} bytes en memoria")

from psutil._common import bytes2human

def memory(num):
  mem = psutil.virtual_memory()
  # Convierte la instancia de svmem en un diccionario
  #mem_dict = {
      #"total": bytes2human(mem.total),
      #"available": bytes2human(mem.available),
      #"percent": bytes2human(mem.percent),
      #"used": bytes2human(mem.used),
      #"free": bytes2human(mem.free),
      #"active": bytes2human(mem.active),
      #"inactive": bytes2human(mem.inactive),
      #"buffers": bytes2human(mem.buffers),
      #"cached": bytes2human(mem.cached),
      #"shared": bytes2human(mem.shared),
      #"slab": bytes2human(mem.slab),}

  print(f"{num} - inactive mem: {bytes2human(mem.inactive)}")

In [None]:
# crear lags
from sqlalchemy import create_engine,text
engine = create_engine("duckdb:////content/dmeyf.db")

with engine.connect() as conn:
  consulta = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'datos_temp';"
  columnas =  dict(conn.execute(text(consulta)).fetchall())
engine.dispose()
tabla = 'datos_temp'

conn.close()
gc.collect()

anteriores_con_bins = [
    "mcaja_ahorro_dolares",
    "mcuentas_saldo",
    "mautoservicio",
    "mforex_buy",
    "mforex_sell",
    "mextraccion_autoservicio",
    "mcheques_depositados",
    "mcheques_emitidos",
    "mcheques_depositados_rechazados",
    "mcheques_emitidos_rechazados",
    "ccajas_consultas",
    "ccajas_otras",
    #"p_rentabilidad",
    "cajas_balance",
    "mtransferencias",
    "sum_atm",
    "sum_cadelantosefectivo",
    "sum_cconsumos",
    "sum_comisiones",
    "sum_comisiones_m",
    "sum_consumo",
    "sum_cpagos",
    "sum_cseguro",
    "sum_ctarjeta",
    "sum_descuentos",
    "sum_descuentos_m",
    "sum_inversion",
    "sum_inversion_dolares",
    "sum_inversion_pesos",
    "sum_madelantodolares",
    "sum_madelantopesos",
    "sum_margen",
    "sum_mconsumosdolares",
    "sum_mconsumospesos",
    "sum_mconsumototal",
    "sum_mcuenta",
    "sum_mfinanciacion_limite",
    "sum_mlimitecompra",
    "sum_mpagado",
    "sum_mpagominimo",
    "sum_mpagos",
    "sum_mpagosdolares",
    "sum_mpagospesos",
    "sum_mpayroll",
    "sum_msaldodolares",
    "sum_msaldototal",
    "sum_mtarjeta",
    "sum_transacciones",
    "sum_transacciones_otras",
    "sum_msaldopesos",
    "ctrx_quarter",
]

anteriores_sin_bins = [
    "cproductos",
    "tcuentas",
    "ccuenta_corriente",
    "ccaja_ahorro",
    "cdescubierto_preacordado",
    "ctarjeta_debito",
    "cprestamos_personales",
    "cprestamos_prendarios",
    "cprestamos_hipotecarios",
    "ccaja_seguridad",
    "cforex",
    "ctransferencias_recibidas",
    "ctransferencias_emitidas",
    "cextraccion_autoservicio",
    "ccheques_depositados",
    "ccheques_emitidos",
    "ccheques_depositados_rechazados",
    "ccheques_emitidos_rechazados",
    "Master_status",
    "Visa_status",
    "sum_cseguro",
    "sum_cpayroll",
    "sum_ctarjeta",
    "sum_delinquency"
]


for col in anteriores_sin_bins:
  if col in columnas:
    add_historicos(tabla,col,[1,3,6,9,12],columnas[col])
    gc.collect()

for i, col in enumerate(anteriores_con_bins):
  print(f'{i}/{len(anteriores_con_bins)}')
  add_historicos_con_bins(tabla, col, [1,3,6,9,12], 50)
  gc.collect()

In [None]:
solo_bins = ['mactivos_margen',
'Master_madelantodolares',
'Master_madelantopesos',
'Master_mconsumosdolares',
'Master_mconsumospesos',
'Master_mconsumototal',
'Master_mfinanciacion_limite',
'Master_mlimitecompra',
'Master_mpagado',
'Master_mpagominimo',
'Master_mpagosdolares',
'Master_mpagospesos',
'Master_msaldodolares',
'Master_msaldopesos',
'Master_msaldototal',
'matm',
'matm_other',
'mcaja_ahorro',
'mcaja_ahorro_adicional',
'mcajeros_propios_descuentos',
'mcomisiones',
'mcomisiones_mantenimiento',
'mcomisiones_otras',
'mcuenta_corriente',
'mcuenta_corriente_adicional',
'mcuenta_debitos_automaticos',
'minversion1_dolares',
'minversion1_pesos',
'minversion2',
'mpagodeservicios',
'mpagomiscuentas',
'mpasivos_margen',
'mpayroll',
'mpayroll2',
'mplazo_fijo_dolares',
'mplazo_fijo_pesos',
'mprestamos_hipotecarios',
'mprestamos_personales',
'mprestamos_prendarios',
'mrentabilidad',
'mrentabilidad_annual',
'mtarjeta_master_consumo',
'mtarjeta_master_descuentos',
'mtarjeta_visa_consumo',
'mtarjeta_visa_descuentos',
'mtransferencias_emitidas',
'mtransferencias_recibidas',
'mttarjeta_master_debitos_automaticos',
'mttarjeta_visa_debitos_automaticos',
'Visa_madelantodolares',
'Visa_madelantopesos',
'Visa_mconsumosdolares',
'Visa_mconsumospesos',
'Visa_mconsumototal',
'Visa_mfinanciacion_limite',
'Visa_mlimitecompra',
'Visa_mpagado',
'Visa_mpagominimo',
'Visa_mpagosdolares',
'Visa_mpagospesos',
'Visa_msaldodolares',
'Visa_msaldopesos',
'Visa_msaldototal']

tabla = 'datos_temp'

for i, col in enumerate(solo_bins):
  print(f'{i}/{len(solo_bins)}')
  crear_bins(tabla, col, 50)
  eliminar_columna(tabla, col)
  gc.collect()


In [None]:
%%sql
ALTER TABLE 'datos_temp' DROP COLUMN "mes";
ALTER TABLE 'datos_temp' DROP COLUMN "mes+1";
ALTER TABLE 'datos_temp' DROP COLUMN "mes+2";
ALTER TABLE 'datos_temp' DROP COLUMN "mes+3";
commit;


In [None]:
%%sql
COPY datos_temp TO 'competencia_03_4.csv.gz' (FORMAT CSV, HEADER 1);

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!cp competencia_03_4.csv.gz /content/drive/MyDrive