# load_file_to_bronze

Este notebook se usa para cargar el csv "erp_extract.csv" en la carpeta de bronce.

## Imports

In [10]:
import os
from datetime import datetime

import pandas as pd

## Parametros

In [7]:
source_file_path = os.path.join(os.curdir, "Datos", "erp_extract.csv")

In [4]:
bronze_file_name = "erp_bronze.parquet"
bronze_path = os.path.join(os.curdir, "Data_Lake","Bronze")
bronze_file_path = os.path.join(bronze_path, bronze_file_name)

## Main

## Cargar el csv a un DataFrame

In [8]:
erp_extract_df = pd.read_csv(source_file_path)

In [9]:
# Comprobamos si el fichero se ha cargado correctamente
erp_extract_df.head()

Unnamed: 0,id_cliente,nombre,email,edad,ciudad,salario,fecha_registro,categoria,telefono,activo,puntos_fidelidad,ultima_compra
0,1,Cliente_1,cliente1@email.com,66.0,Valencia,2235.894632,2016-03-22 22:25:26.825944,B,+34 674293979,False,,2025-04-02 22:25:26.833943
1,2,Cliente_2,cliente2@email.com,,Barcelona,44151.957628,2025-10-08 22:25:26.825944,E,+34 663639417,1,,2024-12-23 22:25:26.833943
2,3,Cliente_3,cliente3@email.com,75.0,Bilbao,35136.416478,2017-05-12 22:25:26.825944,C,+34 652916829,True,,2025-09-25 22:25:26.833943
3,4,Cliente_4,cliente4@email.com,35.0,Bilbao,13325.620084,2021-12-16 22:25:26.825944,C,+34 641535944,True,200.0,2025-02-17 22:25:26.833943
4,5,Cliente_5,cliente5@email.com,38.0,Valencia,42037.063235,2018-11-05 22:25:26.825944,C,+34 602269798,True,200.0,2023-11-18 22:25:26.833943


## AÃ±adir el timestamp de carga

In [11]:
erp_extract_df['_BronzeTimestamp'] = datetime.now()

In [12]:
erp_extract_df.head()

Unnamed: 0,id_cliente,nombre,email,edad,ciudad,salario,fecha_registro,categoria,telefono,activo,puntos_fidelidad,ultima_compra,_BronzeTimestamp
0,1,Cliente_1,cliente1@email.com,66.0,Valencia,2235.894632,2016-03-22 22:25:26.825944,B,+34 674293979,False,,2025-04-02 22:25:26.833943,2025-11-05 09:28:00.303091
1,2,Cliente_2,cliente2@email.com,,Barcelona,44151.957628,2025-10-08 22:25:26.825944,E,+34 663639417,1,,2024-12-23 22:25:26.833943,2025-11-05 09:28:00.303091
2,3,Cliente_3,cliente3@email.com,75.0,Bilbao,35136.416478,2017-05-12 22:25:26.825944,C,+34 652916829,True,,2025-09-25 22:25:26.833943,2025-11-05 09:28:00.303091
3,4,Cliente_4,cliente4@email.com,35.0,Bilbao,13325.620084,2021-12-16 22:25:26.825944,C,+34 641535944,True,200.0,2025-02-17 22:25:26.833943,2025-11-05 09:28:00.303091
4,5,Cliente_5,cliente5@email.com,38.0,Valencia,42037.063235,2018-11-05 22:25:26.825944,C,+34 602269798,True,200.0,2023-11-18 22:25:26.833943,2025-11-05 09:28:00.303091


## Guardar el DataFrame en parquet

In [14]:
# NOTE: Es necesario tener instalado 'fastparquet' para poder guardar en formato parquet
%pip install fastparquet

Collecting fastparquet
  Downloading fastparquet-2024.11.0-cp311-cp311-win_amd64.whl (671 kB)
                                              0.0/671.0 kB ? eta -:--:--
     -------------------------              450.6/671.0 kB 9.4 MB/s eta 0:00:01
     ------------------------------------- 671.0/671.0 kB 10.5 MB/s eta 0:00:00
Collecting cramjam>=2.3 (from fastparquet)
  Downloading cramjam-2.11.0-cp311-cp311-win_amd64.whl (1.7 MB)
                                              0.0/1.7 MB ? eta -:--:--
     ---------------------------------------- 1.7/1.7 MB 106.3 MB/s eta 0:00:00
Collecting fsspec (from fastparquet)
  Downloading fsspec-2025.10.0-py3-none-any.whl (200 kB)
                                              0.0/201.0 kB ? eta -:--:--
     ---------------------------------------- 201.0/201.0 kB ? eta 0:00:00
Installing collected packages: fsspec, cramjam, fastparquet
Successfully installed cramjam-2.11.0 fastparquet-2024.11.0 fsspec-2025.10.0
Note: you may need to restart the ke


[notice] A new release of pip is available: 23.1.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
# Creamos el directorio Bronze si no existe
os.makedirs(bronze_path, exist_ok=True)

In [16]:
erp_extract_df.to_parquet(bronze_file_path, index=False)