# Creation of new variables

Some risk indicators could take information on the combination of one or two different variables.

In [1]:
import pandas as pd
import numpy as np

from currency_converter import RateNotFoundError

from currency_converter import CurrencyConverter

import datetime

import pickle

from unidecode import unidecode

In [2]:
CNTS = '/home/rdora/declaranet/data/pre-process/contratos.csv'
cnts = pd.read_csv(CNTS)

  interactivity=interactivity, compiler=compiler, result=result)


# Files

The `expediente` attribute reffers to the number of the file of the contract. More contracts per file could inidicate a risk in corruption.

In [3]:
exps = cnts.groupby('codigo_expediente').codigo_contrato.count().rename('file_size').reset_index()
cnts = pd.merge(cnts,
                exps,
                how='left',
                on='codigo_expediente')

In [4]:
cnts = cnts.drop('codigo_expediente', axis=1)

# Single bid

A single bid contract can also be more risky than a contract with competition.

In [13]:
cnts.tipo_procedimiento.va()

array(['Invitación a Cuando Menos 3 Personas',
       'Adjudicación Directa Federal', 'Licitación Pública',
       'Licitación Publica Estatal', 'Licitación Pública con OSD', nan,
       'LP', 'AD', 'I3P', 'CONTRATO ENTRE ENTES PUBLICOS', 'OC', 'PC',
       'Adjudicación directa', 'Proyecto de Convocatoria',
       'Invitación a cuando menos 3 personas', 'Otro'], dtype=object)

In [5]:
cnts['tipo_contratacion'].unique()

array(['Adquisiciones', 'Obra Pública', 'Servicios', 'Arrendamientos',
       'Servicios Relacionados con la OP',
       'Servicios relacionados con la OP', nan, 'ADQUISICIONES'],
      dtype=object)

In [193]:
single_bid = [
    'Adjudicación Directa Federal',
    'AD',
    'Adjudicación directa']
cnts['single_bid'] = 0
cnts.loc[cnts.tipo_procedimiento.isin(single_bid), 'single_bid'] = 1

In [194]:
cnts = cnts.drop('tipo_procedimiento', axis=1)

# Date windows

1. Contract Duration.
2. Difference between start of the contract and openning of the bids.

In [195]:
cnts['fecha_apertura_proposiciones'] = pd.to_datetime(cnts['fecha_apertura_proposiciones'],
                                                      errors='coerce',
                                                      format="%Y-%m-%d")

In [196]:
cnts['fecha_apertura_proposiciones'] = pd.to_datetime(cnts['fecha_apertura_proposiciones'],
                                                      errors='coerce',
                                                      format="%Y-%m-%d").dt.date
cnts['fecha_inicio'] = pd.to_datetime(cnts['fecha_inicio'], errors='coerce').dt.date
cnts['fecha_fin'] = pd.to_datetime(cnts['fecha_fin'], errors='coerce').dt.date

In [197]:
cnts['contract_duration'] = (cnts['fecha_fin'] - cnts['fecha_inicio']).dt.days

cnts['prop_window'] = (cnts['fecha_inicio'] - cnts['fecha_apertura_proposiciones']).dt.days

In [198]:
cnts = cnts.drop(['fecha_fin', 'fecha_apertura_proposiciones'], axis=1)

In [227]:
cnts = cnts.rename(columns={'fecha_inicio': 'start_date'})

# Money convertion

It's better if everything is in a single currency (Dollars).

In [199]:
def change_currency(date):
    try:
        change = c.convert(1, 'MXN', 'USD', date)
        return change
    except RateNotFoundError:
        pass
    month = date.month
    for i, new_month in enumerate(range(month + 1, month + 12)):
        if new_month != 12:
            new_month = new_month % 12
        new_date = datetime.date(date.year, new_month, 1)
        try:
            change = c.convert(1, 'MXN', 'USD', new_date)
            return change
        except RateNotFoundError:
            pass
    return 1 / 15  # 15 pesos to USD

In [200]:
c = CurrencyConverter()
cnts.loc[cnts.moneda=='MXN', 'importe_contrato'] = (
                  cnts.loc[cnts.moneda=='MXN', 'fecha_inicio'].apply(change_currency) *
                  cnts.loc[cnts.moneda=='MXN', 'importe_contrato']).apply(np.floor).astype('int')

In [201]:
cnts = cnts.drop('moneda', axis=1)

In [202]:
cnts = cnts.drop('nombre_de_la_uc', axis=1)

In [223]:
cnts = cnts.rename(columns={'importe_contrato': 'amount'})

## Average daily price

In [203]:
cnts['daily_price'] = (cnts['importe_contrato'] / cnts['contract_duration']).round(decimals=0)

# Contract type

1. Services
2. Purchases
3. Public Work
4. Rent

In [204]:
ctype = {
    "Adquisiciones": "Purchase",
    "Servicios": "Service",
    "Obra Pública": "Public work",
    "Servicios Relacionados con la OP": "Public work",
    "Arrendamientos": "Rent",
    "ADQUISICIONES": "Purchase",
    "Servicios relacionados con la OP": "Public work"}
cnts['contract_type'] = cnts['tipo_contratacion'].replace(ctype)

In [205]:
cnts = cnts.drop('tipo_contratacion', axis=1)

## Government level

1. Federal
2. State
3. Municipal

In [206]:
replace = {
    'APF': 'F',
    'GE': 'S',
    'GM': 'M'}
cnts['gvmnt_level'] = cnts.gobierno.replace(replace)

In [207]:
cnts = cnts.drop('gobierno', axis=1)

# Buyer's State

In [208]:
with open('/home/rdora/declaranet/data/pickle/entidades_federativas.p', 'rb') as f:
    state2code = pickle.load(f)

In [209]:
states = sorted(['Aguascalientes', 'Baja California', 'Baja California Sur',
       'Campeche', 'Coahuila de Zaragoza', 'Colima', 'Chiapas',
       'Chihuahua', 'Ciudad de México', 'Durango', 'Guanajuato',
       'Guerrero', 'Hidalgo', 'Jalisco', 'México', 'Michoacán de Ocampo',
       'Morelos', 'Nayarit', 'Nuevo León', 'Oaxaca', 'Puebla',
       'Querétaro', 'Quintana Roo', 'San Luis Potosí', 'Sinaloa',
       'Sonora', 'Tabasco', 'Tamaulipas', 'Tlaxcala',
       'Veracruz de Ignacio de la Llave', 'Yucatán', 'Zacatecas'], key=lambda x: unidecode(x))

In [210]:
codes = list(zip(*sorted(state2code.items(), key=lambda x: x[0])))[1]
code2state = dict(zip(codes, states))

In [211]:
cnts['buyer_state'] = cnts['b_entidad_federativa'].replace(code2state)

In [212]:
cnts = cnts.drop('b_entidad_federativa', axis=1)

In [225]:
cnts = cnts.drop('siglas', axis=1)

## Seller's State

In [213]:
cnts['supplier_state'] = np.NAN
cnts.loc[cnts.siglas_pais == 'MX', 'supplier_state'] = (
    cnts.loc[cnts.siglas_pais == 'MX', 's_entidad_federativa'].replace(code2state))

In [214]:
cnts['supplier_country'] = cnts['siglas_pais']

In [215]:
cnts = cnts.drop(['s_entidad_federativa', 'siglas_pais'], axis=1)

In [221]:
cnts = cnts.rename(columns={'proveedor_contratista': 'supplier'})

## Save

In [231]:
cnts = cnts.rename(columns={'codigo_contrato': 'id'})

In [233]:
SAVE = '/home/rdora/declaranet/data/pre-process/contratos_2.csv'
cnts.to_csv(SAVE, index=False)