In [3]:
from dbfread import DBF
import pandas as pd

def read_dbf_to_dataframe(file_path):
    """
    Reads a DBF file and converts it to a pandas DataFrame.
    
    Parameters:
    file_path (str): The path to the DBF file.
    
    Returns:
    pd.DataFrame: The DataFrame containing the data from the DBF file.
    """
    dbf_table = DBF(file_path, load=True, ignore_missing_memofile=True)
    df = pd.DataFrame(iter(dbf_table))
    return df

In [None]:
# Usage
file_path = "../data/dbf_files/articulo.dbf"
articulo_df = read_dbf_to_dataframe(file_path)

# Display the DataFrame
articulo_df.head()

In [None]:
articulo_df.columns.tolist()

In [None]:
articulo_df.columns

In [None]:
# Usage
file_path = "../data/dbf_files/familias.dbf"
familias_df = read_dbf_to_dataframe(file_path)

# Display the DataFrame
familias_df.head()

In [None]:
familias_df.columns

In [10]:
# Usage
file_path = "D:\ClasGes6\datos\TIKETS.dbf"
tikets_df = read_dbf_to_dataframe(file_path)

# Display the DataFrame
tikets_df.head()

Unnamed: 0,CLATIK,CLAEMP,CLAEJE,CLATPV,FECHA,CLACLI,CODCLI,NOMCLI,NUMERO,TOTAL,...,ITEVALES,TARJETA,FECHAHORA,COMENSALES,FECHORAFIN,AGRUPAPAGO,CLALIQTPV,PREIMPRESO,IDUSERAPP,ESTBAI
0,1,1,1,1,2021-07-09,1,1,PUBLICO EN GENERAL,1.0,5.5,...,0.0,0.0,2021-07-09 19:20:59.999,0.0,2021-07-09 19:23:05.999,False,0,,,0.0
1,2,1,1,1,2021-07-09,1,1,PUBLICO EN GENERAL,2.0,1.0,...,0.0,0.0,2021-07-09 19:59:00.000,0.0,2021-07-09 19:59:39.000,False,0,,,0.0
2,3,1,1,1,2021-07-09,1,1,PUBLICO EN GENERAL,3.0,6.5,...,0.0,0.0,2021-07-09 20:05:59.999,0.0,2021-07-09 20:10:00.000,False,0,,,0.0
3,4,1,1,1,2021-07-09,1,1,PUBLICO EN GENERAL,4.0,6.5,...,0.0,0.0,2021-07-09 20:10:00.000,0.0,2021-07-09 20:20:29.999,False,0,,,0.0
4,5,1,1,1,2021-07-09,1,1,PUBLICO EN GENERAL,5.0,12.5,...,0.0,0.0,2021-07-09 20:19:59.999,0.0,2021-07-09 20:22:38.000,False,0,,,0.0


In [6]:
tikets_df.columns

Index(['CLATIK', 'CLAEMP', 'CLAEJE', 'CLATPV', 'FECHA', 'CLACLI', 'CODCLI',
       'NOMCLI', 'NUMERO', 'TOTAL', 'PAGADO', 'CLAVEN', 'CLATJT', 'CONTAB',
       'HORA', 'CLAELEM', 'ABIERTO', 'CLACIE', 'VERSION', 'ENCOBRO',
       'CLAVENPDA', 'CLAFAC', 'CIFCLI', 'DIRCLI', 'LOCCLI', 'PROVCLI',
       'POSCLI', 'TELCLI', 'EMCLI', 'SERIEFAC', 'NUMFAC', 'ENTREGA',
       'CAMBIODEV', 'REFPAGOTJT', 'COMITJT', 'NPEDIDOTJT', 'CLAVALTPV',
       'VALESDTO', 'ITEVALES', 'TARJETA', 'FECHAHORA', 'COMENSALES',
       'FECHORAFIN', 'AGRUPAPAGO', 'CLALIQTPV', 'PREIMPRESO', 'IDUSERAPP',
       'ESTBAI'],
      dtype='object')

In [7]:
# Guardar en CSV
tikets_df.to_csv("datos.csv", index=False)

In [12]:
# Usage
file_path = "D:\ClasGes6\datos\TIKETSL.dbf"
tiketsl_df = read_dbf_to_dataframe(file_path)

# Display the DataFrame
tiketsl_df.head()

Unnamed: 0,CLATIKL,CLATIK,CLAART,CANTIDAD,CODIGO,LINDESC,DTO,IVA,PRECIO,PRECIOIVA,...,CLAMENUH,IDMENUH,CLAGCOCINA,PRECIOIVA2,PRECIO2,COMBINADO,CLAARTCOMB,PVPIVACOMB,PVPCOMB,PESADA
0,2,1,14,1.0,14,HAWAIANA 26CMS,0.0,10.0,5.0,5.5,...,0,,0,0.0,0.0,False,0,0.0,0.0,False
1,4,2,1,1.0,1,COCA COLA 33CL,0.0,10.0,0.91,1.0,...,0,,0,0.0,0.0,False,0,0.0,0.0,False
2,6,3,24,1.0,24,CUATRO ESTACIONES 26CMS,0.0,10.0,5.91,6.5,...,0,,0,0.0,0.0,False,0,0.0,0.0,False
3,8,4,9,1.0,9,MARGARITA 33CMS,0.0,10.0,5.91,6.5,...,0,,0,0.0,0.0,False,0,0.0,0.0,False
4,11,5,5,3.0,5,KECALAMARES,0.0,10.0,3.18,3.5,...,0,,0,0.0,0.0,False,0,0.0,0.0,False


In [13]:
# Ensure 'FECHA' column is in datetime format
tikets_df['FECHA'] = pd.to_datetime(tikets_df['FECHA'])
# Group by year and sum the 'TOTAL' column
total_by_year = tikets_df.groupby('Year')['TOTAL'].sum()

# Display the result
total_by_year
# Extract year from 'FECHA' column
tikets_df['Year'] = tikets_df['FECHA'].dt.year

# Group by year and count the occurrences of 'CLATIK'
clatik_by_year = tikets_df.groupby('Year')['CLATIK'].count()

# Display the result
clatik_by_year

Year
2021    2458
2022    5354
2023    5526
2024    5276
2025     915
Name: CLATIK, dtype: int64

In [14]:
total_by_year

Year
2021    34988.29
2022    77477.35
2023    81186.05
2024    81573.40
2025    16311.60
Name: TOTAL, dtype: float64

In [15]:
# Usage
file_path = "D:\ClasGes6\datos\ESCAN.dbf"
df = read_dbf_to_dataframe(file_path)

# Display the DataFrame
df.head()

Unnamed: 0,CLAESC,CLAART,FECHA,COMENT,REPRESEN,MARKETING,PORTES,OTROS,FINANCIERO,GGENERALES,MARGEN,PVP,COSTEPROD
0,1,37,2025-03-09,,0.0,0.0,0.0,0.0,0.0,0.0,77.3,10.0,2.27
1,2,9,2025-03-09,,0.0,0.0,0.0,0.0,0.0,0.0,63.469676,7.09,2.59
2,3,13,2025-03-09,,0.0,0.0,0.0,0.0,0.0,0.0,92.80677,7.09,0.51


In [17]:
# Usage
file_path = "D:\ClasGes6\datos\ESCANL.dbf"
df = read_dbf_to_dataframe(file_path)

# Display the DataFrame
df.head(10)

Unnamed: 0,CLAESCL,CLAESC,TIPO,CLAMAT,CLAPROCE,COSTE,CANTIDAD,MTLN,ALKZ,MTLNUM,CLACOL,CLACOLMAT,TYC
0,1,1,M,164,0,5.05,0.1,False,0.0,0.0,0,0,False
1,2,1,M,165,0,7.03,0.25,False,0.0,0.0,0,0,False
2,3,1,M,179,0,0.0,0.1,False,0.0,0.0,0,0,False
3,4,1,M,186,0,0.0,1.0,False,0.0,0.0,0,0,False
4,5,1,M,182,0,0.0,0.1,False,0.0,0.0,0,0,False
5,6,1,M,180,0,0.0,0.1,False,0.0,0.0,0,0,False
6,7,1,M,188,0,0.0,0.1,False,0.0,0.0,0,0,False
7,8,2,M,164,0,5.05,0.1,False,0.0,0.0,0,0,False
8,9,3,M,164,0,5.05,0.1,False,0.0,0.0,0,0,False
9,10,2,M,165,0,7.03,0.25,False,0.0,0.0,0,0,False


In [None]:
from IPython.display import display

# Display the entire DataFrame without truncation
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df_selected)

In [None]:
file_path = "D:\ClasGes6\datos\ARTICULO.dbf"
df = read_dbf_to_dataframe(file_path)

df.head()

# Display the DataFrame
df.sort_values(by='CLAART', ascending=False)



In [None]:
# Guardar en CSV
df.to_csv("../data/raw_data/mfunc.csv", index=False)

In [None]:
df.shape

In [None]:
df_sorted = df.sort_values(by='FECHA', ascending=False)
df_sorted.head()

In [None]:
# Convert 'FECHA' column to datetime
df['FECHA'] = pd.to_datetime(df['FECHA'])

# Group by 'FECHA' and describe
df.groupby('FECHA').describe().sort_index(ascending=False)

In [None]:
max_fecha = df['FECHAHORA'].max()
print(max_fecha)