The following notebook is to process some of the data in Convenio Marco vehículos 2021 and 2023 Transactions. Feel free to modify if needed.

In [1]:
import requests as rq
import os
import zipfile
import io
import pandas as pd

In [2]:
root_path =  r"C:\Users\lucas\OneDrive - Yale University\Documents\GitHub\2nd-year-paper"
input_path = os.path.join(root_path, 'raw_data', 'ConvenioMarco', 'transacciones') 
dest_path =  os.path.join(root_path, 'interm_data', 'yearly_data', 'Transacciones') # destination path

months = range(1, 12 + 1)

In [3]:
def replace_columns(df: pd.DataFrame) -> pd.DataFrame:

    """Replace columns names in dataframe
    
    df : pandas dataframe

    """

    column_replace = {'Nro Licitaci?n P?blica' : 'Nro Licitación Pública', 'Nro LicitaciÃ³n PÃºblica': 'Nro Licitación Pública', 'Fecha Env?o OC': 'Fecha Envío OC',\
                   'Especificaci?n del Comprador' : 'Especificación del Comprador', 'TotaL?nea(Neto)' : 'TotaLínea(Neto)', \
                    'Raz?n Social Comprador': 'Razón Social Comprador', 'Direcci?n Unidad Compra' : 'Dirección Unidad Compra', \
                    'Regi?n Unidad de Compra' : 'Región Unidad de Compra', 'Instituci?n' : 'Institución', 'Regi?n del Proveedor' : 'Región del Proveedor', \
                 'Nro Licitacion Publica' : 'Nro Licitación Pública', 'Fecha Envio OC': 'Fecha Envío OC',\
                   'Especificacion del Comprador' : 'Especificación del Comprador', 'TotaLinea(Neto)' : 'TotaLínea(Neto)', \
                    'Razon Social Comprador': 'Razón Social Comprador', 'Direccion Unidad Compra' : 'Dirección Unidad Compra', \
                    'Region Unidad de Compra' : 'Región Unidad de Compra', 'Institucion' : 'Institución', 'Region del Proveedor' : 'Región del Proveedor'}    
    df.rename(columns=column_replace, inplace=True)

    return df

## Extract vehiculos 2013 and 2017

The 2017 code is: 2239-4-LR17
The 2013 code is: 2239-20-LP13

We have to be careful because this FAs also include the subletting, not only the purchase, of vehicles 

In [None]:
destination_path_2017 = os.path.join(dest_path, 'transacciones_cm_2017.csv')
destination_path_2013 = os.path.join(dest_path, 'transacciones_cm_2013.csv')

#create empty dfs
df_transacciones_cm_2013 = pd.DataFrame()
df_transacciones_cm_2017 = pd.DataFrame()

for year in range(2015, 2023):
    for month in months:
        try:
            path = os.path.join(input_path, f'{year}', f'{year}-{month}.csv')
            df = pd.read_csv(path)
            #df = pd.read_csv(path, sep=';', encoding='latin-1')

            df = replace_columns(df)
            rows_2017 = df[df['Nro Licitación Pública'] == '2239-4-LR17']
            rows_2013 = df[df['Nro Licitación Pública'] == '2239-20-LP13']

            if len(rows_2017) != 0:
                df_transacciones_cm_2017 = pd.concat([df_transacciones_cm_2017, rows_2017])
            if len(rows_2013) != 0:
                df_transacciones_cm_2013 = pd.concat([df_transacciones_cm_2013, rows_2013])
            print(f'{year}-{month} done', 'number of 2013 columns:', len(rows_2013), 'number of 2017 columns:', len(rows_2017))
        except:
            print(f'{year}-{month} not found')
            print(path) 
            

df_transacciones_cm_2017
df_transacciones_cm_2017.to_csv(destination_path_2017)
df_transacciones_cm_2013.to_csv(destination_path_2013)

2015-1 done number of 2013 columns: 0 number of 2017 columns: 0


  df = pd.read_csv(path)


2015-2 done number of 2013 columns: 0 number of 2017 columns: 0
2015-3 done number of 2013 columns: 0 number of 2017 columns: 0
2015-4 done number of 2013 columns: 0 number of 2017 columns: 0
2015-5 done number of 2013 columns: 0 number of 2017 columns: 0
2015-6 done number of 2013 columns: 0 number of 2017 columns: 0


  df = pd.read_csv(path)


2015-7 done number of 2013 columns: 0 number of 2017 columns: 0
2015-8 done number of 2013 columns: 0 number of 2017 columns: 0


  df = pd.read_csv(path)


2015-9 done number of 2013 columns: 0 number of 2017 columns: 0
2015-10 done number of 2013 columns: 0 number of 2017 columns: 0
2015-11 done number of 2013 columns: 0 number of 2017 columns: 0
2015-12 done number of 2013 columns: 0 number of 2017 columns: 0
2016-1 done number of 2013 columns: 0 number of 2017 columns: 0
2016-2 done number of 2013 columns: 0 number of 2017 columns: 0
2016-3 done number of 2013 columns: 0 number of 2017 columns: 0
2016-4 done number of 2013 columns: 0 number of 2017 columns: 0
2016-5 done number of 2013 columns: 0 number of 2017 columns: 0
2016-6 done number of 2013 columns: 0 number of 2017 columns: 0
2016-7 done number of 2013 columns: 0 number of 2017 columns: 0
2016-8 done number of 2013 columns: 0 number of 2017 columns: 0
2016-9 done number of 2013 columns: 0 number of 2017 columns: 0
2016-10 done number of 2013 columns: 0 number of 2017 columns: 0
2016-11 done number of 2013 columns: 0 number of 2017 columns: 0
2016-12 done number of 2013 columns

  df = pd.read_csv(path)


2017-4 done number of 2013 columns: 0 number of 2017 columns: 0
2017-5 done number of 2013 columns: 0 number of 2017 columns: 0
2017-6 done number of 2013 columns: 0 number of 2017 columns: 0
2017-7 done number of 2013 columns: 0 number of 2017 columns: 0
2017-8 not found
C:\Users\lucas\OneDrive - Yale University\Documents\GitHub\2nd-year-paper\raw_data\ConvenioMarco\transacciones\2017\2017-8.csv
2017-9 done number of 2013 columns: 0 number of 2017 columns: 0
2017-10 done number of 2013 columns: 0 number of 2017 columns: 0
2017-11 done number of 2013 columns: 0 number of 2017 columns: 0
2017-12 done number of 2013 columns: 0 number of 2017 columns: 0
2018-1 done number of 2013 columns: 0 number of 2017 columns: 0
2018-2 done number of 2013 columns: 0 number of 2017 columns: 63
2018-3 done number of 2013 columns: 0 number of 2017 columns: 215


  df = pd.read_csv(path)


2018-4 done number of 2013 columns: 0 number of 2017 columns: 274


  df = pd.read_csv(path)


2018-5 done number of 2013 columns: 0 number of 2017 columns: 308


  df = pd.read_csv(path)


2018-6 done number of 2013 columns: 0 number of 2017 columns: 323


  df = pd.read_csv(path)


2018-7 done number of 2013 columns: 0 number of 2017 columns: 291


  df = pd.read_csv(path)


2018-8 done number of 2013 columns: 0 number of 2017 columns: 373
2018-9 done number of 2013 columns: 0 number of 2017 columns: 301


  df = pd.read_csv(path)


2018-10 done number of 2013 columns: 0 number of 2017 columns: 484
2018-11 done number of 2013 columns: 0 number of 2017 columns: 459
2018-12 done number of 2013 columns: 0 number of 2017 columns: 483
2019-1 done number of 2013 columns: 0 number of 2017 columns: 298
2019-2 done number of 2013 columns: 0 number of 2017 columns: 323


  df = pd.read_csv(path)


2019-3 done number of 2013 columns: 0 number of 2017 columns: 428


  df = pd.read_csv(path)


2019-4 done number of 2013 columns: 0 number of 2017 columns: 492
2019-5 done number of 2013 columns: 0 number of 2017 columns: 535


  df = pd.read_csv(path)


2019-6 done number of 2013 columns: 0 number of 2017 columns: 504


  df = pd.read_csv(path)


2019-7 done number of 2013 columns: 0 number of 2017 columns: 536
2019-8 done number of 2013 columns: 0 number of 2017 columns: 561
2019-9 done number of 2013 columns: 0 number of 2017 columns: 483
2019-10 done number of 2013 columns: 0 number of 2017 columns: 661
2019-11 done number of 2013 columns: 0 number of 2017 columns: 582


  df = pd.read_csv(path)


2019-12 done number of 2013 columns: 0 number of 2017 columns: 653
2020-1 done number of 2013 columns: 0 number of 2017 columns: 471
2020-2 done number of 2013 columns: 0 number of 2017 columns: 486
2020-3 done number of 2013 columns: 0 number of 2017 columns: 475
2020-4 done number of 2013 columns: 0 number of 2017 columns: 316
2020-5 done number of 2013 columns: 0 number of 2017 columns: 328


  df = pd.read_csv(path)


2020-6 done number of 2013 columns: 0 number of 2017 columns: 369
2020-7 done number of 2013 columns: 0 number of 2017 columns: 358
2020-8 done number of 2013 columns: 0 number of 2017 columns: 416
2020-9 done number of 2013 columns: 0 number of 2017 columns: 455


  df = pd.read_csv(path)


2020-10 done number of 2013 columns: 0 number of 2017 columns: 486


  df = pd.read_csv(path)


2020-11 done number of 2013 columns: 0 number of 2017 columns: 535
2020-12 done number of 2013 columns: 0 number of 2017 columns: 479
2021-1 done number of 2013 columns: 0 number of 2017 columns: 271
2021-2 done number of 2013 columns: 0 number of 2017 columns: 431
2021-3 done number of 2013 columns: 0 number of 2017 columns: 513
2021-4 done number of 2013 columns: 0 number of 2017 columns: 477
2021-5 done number of 2013 columns: 0 number of 2017 columns: 48
2021-6 done number of 2013 columns: 0 number of 2017 columns: 480
2021-7 done number of 2013 columns: 0 number of 2017 columns: 505
2021-8 done number of 2013 columns: 0 number of 2017 columns: 541
2021-9 done number of 2013 columns: 0 number of 2017 columns: 489
2021-10 done number of 2013 columns: 0 number of 2017 columns: 530
2021-11 done number of 2013 columns: 0 number of 2017 columns: 542
2021-12 done number of 2013 columns: 0 number of 2017 columns: 366


## Extract CM vehículos 2021 and CM vehículos 2023

In [5]:
root_path =  r"C:\Users\lucas\OneDrive - Yale University\Documents\GitHub\2nd-year-paper"

input_path = os.path.join(root_path, 'raw_data', 'ConvenioMarco', 'transacciones') 
dest_path =  os.path.join(root_path, 'interm_data', 'yearly_data', 'Transacciones') # destination path

destination_path_2021 = os.path.join(dest_path, 'transacciones_cm_2021.csv')
destination_path_2023 = os.path.join(dest_path, 'transacciones_cm_2023.csv')

#create empty dfs
df_transacciones_cm_2021 = pd.DataFrame()
df_transacciones_cm_2023 = pd.DataFrame()

for year in range(2020, 2025): #range(2015, 2025):
    for month in range(1,13): #range(1, 13):
        try:
             
            path = os.path.join(input_path, str(year), f'{year}-{month}.csv')
            print(f"File exists: {os.path.exists(path)}")
             
            df = pd.read_csv(path, encoding='latin-1')
            df = replace_columns(df)
            print(f"Raw path string: {repr(path)}")  # This will show any hidden characters
            rows_2021 = df[df['Nro Licitación Pública'] == '2239-5-LR21']
            rows_2023 = df[df['Nro Licitación Pública'] == '2239-8-LR23']

            if len(rows_2021) != 0:
                df_transacciones_cm_2021 = pd.concat([df_transacciones_cm_2021, rows_2021])
            if len(rows_2023) != 0:
                df_transacciones_cm_2023 = pd.concat([df_transacciones_cm_2023, rows_2023])

            print(f'{year}-{month} done')
            
        except:
            print(f'{year}-{month} not found')
            print(path)

df_transacciones_cm_2021.to_csv(destination_path_2021)
df_transacciones_cm_2023.to_csv(destination_path_2023)

File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-1.csv'
2020-1 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-2.csv'
2020-2 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-3.csv'
2020-3 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-4.csv'
2020-4 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-5.csv'
2020-5 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-6.csv'
2020-6 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-7.csv'
2020-7 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-8.csv'
2020-8 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-9.csv'
2020-9 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-10.csv'
2020-10 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-11.csv'
2020-11 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2020\\2020-12.csv'
2020-12 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2021\\2021-1.csv'
2021-1 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2021\\2021-2.csv'
2021-2 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2021\\2021-3.csv'
2021-3 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale Universit

  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2022\\2022-3.csv'
2022-3 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2022\\2022-4.csv'
2022-4 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2022\\2022-5.csv'
2022-5 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2022\\2022-6.csv'
2022-6 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2022\\2022-7.csv'
2022-7 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2022\\2022-8.csv'
2022-8 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2022\\2022-9.csv'
2022-9 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2022\\2022-10.csv'
2022-10 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\

  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2023\\2023-1.csv'
2023-1 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2023\\2023-2.csv'
2023-2 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2023\\2023-3.csv'
2023-3 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2023\\2023-4.csv'
2023-4 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2023\\2023-5.csv'
2023-5 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\D

  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2023\\2023-10.csv'
2023-10 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2023\\2023-11.csv'
2023-11 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2023\\2023-12.csv'
2023-12 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2024\\2024-1.csv'
2024-1 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2024\\2024-2.csv'
2024-2 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2024\\2024-3.csv'
2024-3 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2024\\2024-4.csv'
2024-4 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2024\\2024-5.csv'
2024-5 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2024\\2024-6.csv'
2024-6 done
File exists: True


  df = pd.read_csv(path, encoding='latin-1')


Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2024\\2024-7.csv'
2024-7 done
File exists: True
Raw path string: 'C:\\Users\\lucas\\OneDrive - Yale University\\Documents\\GitHub\\2nd-year-paper\\raw_data\\ConvenioMarco\\transacciones\\2024\\2024-8.csv'
2024-8 done
File exists: False
2024-9 not found
C:\Users\lucas\OneDrive - Yale University\Documents\GitHub\2nd-year-paper\raw_data\ConvenioMarco\transacciones\2024\2024-9.csv
File exists: False
2024-10 not found
C:\Users\lucas\OneDrive - Yale University\Documents\GitHub\2nd-year-paper\raw_data\ConvenioMarco\transacciones\2024\2024-10.csv
File exists: False
2024-11 not found
C:\Users\lucas\OneDrive - Yale University\Documents\GitHub\2nd-year-paper\raw_data\ConvenioMarco\transacciones\2024\2024-11.csv
File exists: False
2024-12 not found
C:\Users\lucas\OneDrive - Yale University\Documents\GitHub\2nd-year-paper\raw_data\ConvenioMarco\transacciones\202

  df = pd.read_csv(path, encoding='latin-1')
