In [None]:
import os

from_drive = True  # same flag you use everywhere

if os.environ.get("ATLAS_BOOTSTRAPPED") != "1":
    # ---------- GIT ON COLAB ONLY ----------
    try:
        from google.colab import userdata

        git_token = userdata.get('gitToken')
        git_user = userdata.get('gitUser')
        git_url = f'https://{git_token}@github.com/rene-aum/Atlas.git'
        branch_to_pull = 'dev'

        os.chdir('/content')

        if not os.path.isdir('Atlas'):
            !git clone {git_url}

        %cd Atlas
        !git fetch origin {branch_to_pull}
        !git checkout {branch_to_pull}
        !git pull origin {branch_to_pull}

        !pip install -r PipelinesConsumo/src/requirements.txt
        %cd PipelinesConsumo

    except Exception as e:
        print(e)
        print('Running in other environment not colab probably!')

    # ---------- DRIVE + SHEETS ----------
    if from_drive:
        from pydrive2.auth import GoogleAuth
        from pydrive2.drive import GoogleDrive
        from google.colab import auth
        from oauth2client.client import GoogleCredentials
        import gspread
        from google.auth import default
        from gspread_dataframe import set_with_dataframe
        import gdown

        auth.authenticate_user()
        gauth = GoogleAuth()
        gauth.credentials = GoogleCredentials.get_application_default()
        drive = GoogleDrive(gauth)

        creds, _ = default()
        gc = gspread.authorize(creds)

    os.environ["ATLAS_BOOTSTRAPPED"] = "1"
else:
    print("Bootstrap already done, assuming orchestrator ran it.")


In [None]:

import sys
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import pytz
# from matplotlib.ticker import FuncFormatter
from datetime import datetime, timedelta
import warnings
import sys
sys.path.append('..')
sys.path.append('../..')
from utils.utils import (get_dates_dataframe,
                       add_year_week,
                       custom_read,
                       process_columns,
                       remove_accents)
from PipelinesConsumo.src.rawAtlas import RawAtlas
from PipelinesConsumo.src.processedAtlas import ProcessedAtlas
from src.transformed import Transformed
from utils.drive_toolbox import(from_drive_to_local,
                             get_last_modification_date_drive,
                             create_sheets_in_drive_folder,
                             update_sheets_in_drive_folder,
                             read_from_google_sheets,
                             list_file_ids_for_drive_folder,
                             create_csv_file_in_drive_folder,
                             write_csv_to_drive,
                             read_csv_from_drive)
from utils.utils import get_dates_dataframe
from src.constants import (atlas_raw_output_folder_id,
                           atlas_consumo_output_folder_id,
                           consumo_sheets_ids_dict,
                           data_source_folder_id,
                           raw_output_ids,
                           folder_id_bauto_gabo,
                           id_reporte_ventas,
                           id_torre_de_control,
                           )


warnings.filterwarnings('ignore')





## **Fuentes que usaremos: Publicaciones, Pedidos y Clientes**


In [None]:

publicaciones = read_from_google_sheets(gc,consumo_sheets_ids_dict['AcPublicaciones'])
pedidos = read_from_google_sheets(gc,consumo_sheets_ids_dict['AcPedidos'])
clientes = read_from_google_sheets(gc,consumo_sheets_ids_dict['AcClientes'])



### **Pedidos**


In [None]:

pedidos = pedidos.rename(columns={'id_am_comprador':'id_am'})
pedidos = pedidos[['id_am','sf_order_id','fecha_de_creacion','descripcion_vehiculo', 'vin',
                   'sf_order_status','id_am_vendedor','precio_de_publicacion']]
pedidos = pedidos.merge(publicaciones[['vin','published_at']], on='vin', how='left')

output_usuarios_clientes = '1ajQtmiCE51Xrzbkv2s-lbAmyc_UcBB4v6LEb7PJp1aY'
update_sheets_in_drive_folder(gc,output_usuarios_clientes,'Pedidos',pedidos)



### **Clientes**


In [None]:

clientes = (clientes[['id_am','billing_firstname','billing_lastname','phone','email','customer_since']]
            .assign(phone = lambda x: x.phone.astype('Int64').astype(str).str[-10:])
            .assign(phone = lambda x: pd.to_numeric(x.phone,errors='coerce').astype('Int64'))
            )
clientes_mod = clientes.copy()
output_usuarios_clientes = '1KAytwJrSseCiaJkU09tIxBJJPKmgN7FV2VlBTQsr_qY'
update_sheets_in_drive_folder(gc,output_usuarios_clientes,'Clientes',clientes)


In [None]:

### **Sección API's**

try:
  folder_id_catalogos = "1TE24Yl4lQ6ZxHJSw_ZDzb6W4XpB-21FL"
  id_catalogos_status = list_file_ids_for_drive_folder(drive,folder_id_catalogos)['CatalogoSolicitudesAprobacion']
  cat_tarea_actual = (read_from_google_sheets(gc,id_catalogos_status,sheetname='CatTareaActual')
                      .assign(tareaactual = lambda x: x.nb_tarea_actual.apply(remove_accents).str.strip().str.upper(),
                              status_automarket = lambda x: x.status_am.apply(remove_accents).str.strip().str.upper())
                      .drop(columns=['nb_tarea_actual','status_am'])
                      )
  cat_decision_sistema = (read_from_google_sheets(gc,id_catalogos_status,sheetname='CatDecisionSistema')
                        .assign(decisionsistema = lambda x: x.decisionsistema.apply(remove_accents).str.strip().str.upper(),
                                status_riesgos = lambda x: x.riesgos.apply(remove_accents).str.strip().str.upper())
                        .drop(columns=['riesgos'])
                          )

  # edas, gabo, torre de control
  edas = read_from_google_sheets(gc,consumo_sheets_ids_dict['AcEdas'])
  bauto = read_from_google_sheets(gc,consumo_sheets_ids_dict['AcConsolidadoBautoLastStatus'])
  clientes = read_from_google_sheets(gc,consumo_sheets_ids_dict['AcClientes'])
  # clientes
  subset_columns = ['id_lead','origen_automarket','id_comprador','id_de_ultimo_pedido','folio_bauto',
                    'espacio_automarket','asesor_de_ventas','fecha_de_asignacion','total_apartados',
                    'estatus_de_lead','lead_contactado_(visualiza_cc)',
                    'fecha_de_cierre_del_lead','motivos_de_cancelacion',
                    'correo_recibido_en_buzon_contingencia','documentacion_completa_contingencia',
                    ]
  rename_dict = {'lead_contactado_(visualiza_cc)':'lead_contactado'}


  folder_id_folios_rod = '1OW4yxE7h8BCcn0mqhCfk05B4_27Ghvfd'
  files_rod = list_file_ids_for_drive_folder(drive,folder_id_folios_rod)
  files = list(files_rod.keys())
  files = [x for x in files if '.xlsx' in x]
  latest_rod_id = files_rod.get(files[0]) ## cambiar este siempre
  print('Fecha archivo folios rod:')
  print(files[0])
  latest_rod_id
  from_drive_to_local(drive,latest_rod_id,'rod_latest.xlsx')
  rod = (pd.read_excel('rod_latest.xlsx')
        .rename(columns={'Name':'intento',
                        'MX_ATN_Id_Simulacion__c':'n_simulacion',
                        'MX_ATN_creditId__c':'folio',
                        'MX_ATN_Account__r.Name':'name',
                        'MX_ATN_Account__r.MX_ATN_CommerceId__c':'id_am',
                        'MX_ATN_Account__r.MX_ATN_PrimaryContact__r.Email':'email',
                        'MX_ATN_Account__r.MX_ATN_PrimaryContact__r.MobilePhone':'phone',
                        'MX_ATN_Status__c':'status'})
        .assign(CreatedDate = lambda x: pd.to_datetime(x.CreatedDate, format="%d/%m/%Y, %H:%M"),
                phone = lambda x: x.phone.astype('Int64').astype(str))
        )
  flag_insumos_api=1
except Exception as e:
  print('Error en load de insumos para funnel api. No se incluirá en la master table.')
  print(e)
  flag_insumos_api=0

if flag_insumos_api==1:
        bauto_mod_api = (bauto
                .assign(flag_eda = lambda x: np.where(x.folio.isin(edas.folio.unique()),1,0),
                        origen_real= lambda x: np.where(x.folio.isin(edas.folio.unique()),'EDA',x.origen),
                        decisionsistema = lambda x: x.decisionsistema.apply(remove_accents).str.strip().str.upper(),
                        tareaactual = lambda x: x.tareaactual.apply(remove_accents).str.strip().str.upper(),
                        telefono = lambda x: x.telefono.astype('Int64')
                        )
                [lambda x: x.origen_real=='API']
                .merge(cat_tarea_actual,on='tareaactual',how='left')
                .merge(cat_decision_sistema,on='decisionsistema',how='left')
                .drop_duplicates()
                .sort_values(by='fecha_creacion',ascending=False)
                )


        dummies_status_df = pd.get_dummies(bauto_mod_api.status_automarket)*1
        dummies_status_df.columns = [f'status_automarket_{x}' for x in dummies_status_df.columns]
        bauto_mod_api = pd.concat([bauto_mod_api,dummies_status_df],axis=1)





In [None]:

output_usuarios_apis = '1fF4PXpUvAz0rvmRkrls2xb80Ua5JL73w6ghqLqun6yI'
update_sheets_in_drive_folder(gc,output_usuarios_apis,'APIS',rod)



### **Leads**


In [None]:

folder_id_historicoleads= "1zvW-Dxow9gz1Dnbpg_jO7my4wadDvJDW"
id_files_historicoleads = list_file_ids_for_drive_folder(drive,folder_id_historicoleads)
files_historicoleads = list(id_files_historicoleads.keys())
files_historicoleads = [x for x in files_historicoleads if '_latest' in x]
# Corrected line: Use id_files_historicoleads (dictionary) to get the ID, not files_historicoleads (list)
latest_historicoleads_id = id_files_historicoleads.get(files_historicoleads[0])
from_drive_to_local(drive,latest_historicoleads_id,'historico_tc_latest.csv')
leads = pd.read_csv('historico_tc_latest.csv')
leads = leads.rename(columns={'id comprador':'id_am', 'id lead' : 'id_lead'})
leads = leads[['id_am','id_lead','fecha de asignacion','estatus de lead','origen automarket','espacio automarket','asesor espacio','asesor credito']]



In [None]:

output_usuarios_apis = '1lbkOdSj6prOuAGEFHAvhPo72FYgWGLZEW2d4YTT66oM'
update_sheets_in_drive_folder(gc,output_usuarios_apis,'Leads',leads)

## Otros folios

In [None]:
rodmod = rod[lambda x: x.folio.notna()][['folio','id_am']].assign(flag_rod = 1).rename(columns={'id_am':'id_am_rod'})

In [None]:

otros_folios_final = (bauto.assign(email = lambda x: x.email.str.lower())
.merge(rodmod,on='folio',how='left')
[lambda x: x.flag_rod!=1]
 .merge(clientes_mod[['id_am','email']].assign(email=lambda x: x.email.str.lower()),on='email',how='left')
 [lambda x: x.id_am.notna()]
 [lambda x: ~x.folio.isin(edas.folio.unique())]
 [['id_am','email','fecha_creacion','folio','origen']]
 .rename(columns = {'origen':'origen_bauto'})

 )

In [None]:
id_otros_folios = '1myAkBxjQ0Hmll5JejkZTgvqACA47WHPK99KR5zIpbAU'
update_sheets_in_drive_folder(gc,id_otros_folios,'Hoja 1',otros_folios_final)

## Edas

In [None]:

edas_folios=(edas[['folio','telefono_celular_del_cliente','fecha_ref']]
 .rename(columns = {'telefono_celular_del_cliente':'phone'})
 .assign(phone = lambda x: pd.to_numeric(x.phone,errors='coerce').astype('Int64'))
[lambda x: x.phone.notna()]
 .merge(clientes_mod[['id_am','phone']],on='phone',how='left')
 [lambda x: x.id_am.notna()]

 )


In [None]:
id_folios_edas = '1myAkBxjQ0Hmll5JejkZTgvqACA47WHPK99KR5zIpbAU'
update_sheets_in_drive_folder(gc,id_folios_edas,'Hoja 1',edas_folios)