# PR0301: Ingesta de datos de archivos

## Ingesta CSV

In [20]:
import pandas as pd

df_norte = pd.read_csv("./ventas_norte.csv", sep=";", dtype={"Cantidad_Vendida": int, "Precio_Unit": int, "Nom_Producto": str}, parse_dates=["Fecha_Venta"], index_col="ID_Transaccion")
df_norte["region"] = "Norte"
df_norte

Unnamed: 0_level_0,Fecha_Venta,Nom_Producto,Cantidad_Vendida,Precio_Unit,region
ID_Transaccion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1000,2023-02-21,Laptop,4,423,Norte
1001,2023-01-15,Laptop,2,171,Norte
1002,2023-03-13,Laptop,3,73,Norte
1003,2023-03-02,Teclado,1,139,Norte
1004,2023-01-21,Monitor,4,692,Norte
...,...,...,...,...,...
1095,2023-02-10,Laptop,3,516,Norte
1096,2023-01-29,Monitor,3,321,Norte
1097,2023-01-15,Laptop,4,200,Norte
1098,2023-02-14,Mouse,4,626,Norte


In [22]:
df_norte.info()

<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 1000 to 1099
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Fecha_Venta       100 non-null    datetime64[ns]
 1   Nom_Producto      100 non-null    object        
 2   Cantidad_Vendida  100 non-null    int64         
 3   Precio_Unit       100 non-null    int64         
 4   region            100 non-null    object        
dtypes: datetime64[ns](1), int64(2), object(2)
memory usage: 4.7+ KB


## Ingesta Excel

In [23]:
df_sur = pd.read_excel("./ventas_sur.xlsx", names=["ID_Transaccion", "Fecha_Venta", "Nom_Producto", "Cantidad_Vendida", "Precio_Unit"], dtype={"Cantidad_Vendida": int, "Precio_Unit": int, "Nom_Producto": str}, parse_dates=["Fecha_Venta"], index_col="ID_Transaccion")
df_sur["region"] = "Sur"
df_sur

Unnamed: 0_level_0,Fecha_Venta,Nom_Producto,Cantidad_Vendida,Precio_Unit,region
ID_Transaccion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000,2023-03-01,Monitor,6,624,Sur
2001,2023-03-04,Laptop,7,941,Sur
2002,2023-03-26,Mouse,3,989,Sur
2003,2023-02-01,Webcam,3,621,Sur
2004,2023-03-28,Mouse,5,437,Sur
2005,2023-02-02,Mouse,6,134,Sur
2006,2023-03-08,Laptop,9,636,Sur
2007,2023-01-18,Teclado,5,922,Sur
2008,2023-01-25,Mouse,1,215,Sur
2009,2023-02-23,Monitor,4,845,Sur


## Ingesta JSON

In [24]:
import json
with open("./ventas_este.json") as f:
    info = json.load(f)
df_este = pd.json_normalize(info)
df_este = df_este[["id_orden", "timestamp", "detalles_producto.nombre", "detalles_producto.specs.cantidad", "detalles_producto.specs.precio"]]
df_este["timestamp"] = df_este["timestamp"].apply(lambda x: x.split(" ")[0])
df_este = df_este.rename(columns={"id_orden": "ID_Transaccion", "timestamp": "Fecha_Venta", "detalles_producto.nombre": "Nom_Producto", "detalles_producto.specs.cantidad": "Cantidad_Vendida", "detalles_producto.specs.precio": "Precio_Unit"})
df_este = df_este.set_index("ID_Transaccion")
df_este["region"] = "Este"
df_este

Unnamed: 0_level_0,Fecha_Venta,Nom_Producto,Cantidad_Vendida,Precio_Unit,region
ID_Transaccion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ORD-3000,2023-03-09,Monitor,2,244,Este
ORD-3001,2023-01-20,Laptop,2,578,Este
ORD-3002,2023-01-01,Mouse,2,339,Este
ORD-3003,2023-02-07,Webcam,2,158,Este
ORD-3004,2023-03-18,Monitor,1,692,Este
...,...,...,...,...,...
ORD-3095,2023-03-28,Webcam,1,857,Este
ORD-3096,2023-01-18,Webcam,2,375,Este
ORD-3097,2023-02-10,Mouse,1,696,Este
ORD-3098,2023-01-25,Mouse,2,618,Este


In [25]:
df_junto = pd.concat([df_norte, df_sur, df_este])
df_junto

Unnamed: 0_level_0,Fecha_Venta,Nom_Producto,Cantidad_Vendida,Precio_Unit,region
ID_Transaccion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1000,2023-02-21 00:00:00,Laptop,4,423,Norte
1001,2023-01-15 00:00:00,Laptop,2,171,Norte
1002,2023-03-13 00:00:00,Laptop,3,73,Norte
1003,2023-03-02 00:00:00,Teclado,1,139,Norte
1004,2023-01-21 00:00:00,Monitor,4,692,Norte
...,...,...,...,...,...
ORD-3095,2023-03-28,Webcam,1,857,Este
ORD-3096,2023-01-18,Webcam,2,375,Este
ORD-3097,2023-02-10,Mouse,1,696,Este
ORD-3098,2023-01-25,Mouse,2,618,Este


In [27]:
df_junto.to_csv(index=False, sep=",", encoding="utf-8", path_or_buf="./ventas_consolidadas.csv")