In [69]:
# Libs imports
try:
    import numpy as np
    import pandas as pd
    print('All imports successful!')
except Exception as e:
    print(e)

pd.set_option('display.max_columns', None)


All imports successful!


In [70]:
# Data Loading (csv --> pandas dataframe)
try:
    df=pd.read_csv('data/AssamProcurementData2016to2022.csv', low_memory=False)
    print('Data loaded successfully and assigned to variable df!')
except Exception as e:
    print(e)

Data loaded successfully and assigned to variable df!


# Features selection & Fixing dtypes

In [71]:
# Keeping useful columns only.
df = df[['ocid', 'tender/id', 'tender/externalReference', 'tender/title',
       'tender/mainProcurementCategory', 'tender/procurementMethod',
       'tender/contractType', 'tenderclassification/description',
       'tender/participationFee/0/multiCurrencyAllowed',
       'tender/allowTwoStageTender', 'tender/value/amount',
       'tender/datePublished', 'tender/tenderPeriod/durationInDays','Payment Mode',
       'tender/stage', 'tender/numberOfTenderers',
       'tender/bidOpening/date', 'buyer/name', 'fiscal_year']]

# Fixing dtypes
try:
    df['tender/value/amount'] = pd.to_numeric(df['tender/value/amount'], errors='coerce')
    df['tender/datePublished'] = pd.to_datetime(df['tender/datePublished'], errors='coerce')
    df['tender/bidOpening/date'] = pd.to_datetime(df['tender/bidOpening/date'], dayfirst=True)
    df[['fiscal_start_year', 'fiscal_end_year']] = df['fiscal_year'].astype(str).str.split('-', expand=True).astype("int64")
    print('Dtypes fixed successfully!')
except Exception as e:
    print(e)

Dtypes fixed successfully!


# Renaming Columns & Dropping duplicated columns

In [72]:
# Renaming
try:
    new_names = {'ocid': 'Ocid', 'tender/id': 'Tender_Id', 'tender/externalReference': 'External_Ref', 'tender/title': 'Title',
                 'tender/mainProcurementCategory': 'Main_Procurement_Category', 'tender/procurementMethod': 'Procurement_Method',
                 'tender/contractType': 'Contract_Type', 'tenderclassification/description': 'Classification_Description',
                 'tender/participationFee/0/multiCurrencyAllowed': 'PFee_Multi_currency_allowed', 'tender/allowTwoStageTender': 'Two_Stage_Tender',
                 'tender/value/amount': 'Amount', 'tender/datePublished': 'Date_Published', 'tender/tenderPeriod/durationInDays': 'Duration_In_Days',
                 'Payment Mode': 'Payment_Mode', 'tender/stage': 'Stage', 'tender/numberOfTenderers': 'Number_Of_Tenderers',
                 'tender/bidOpening/date': 'Bid_Opening_Date', 'buyer/name': 'Buyer_Name', 'fiscal_year': 'Fiscal_Year',
                 'fiscal_start_year': 'Fiscal_Start_Year', 'fiscal_end_year': 'Fiscal_End_Year'
                 }
    df = df.rename(new_names, axis='columns')
    print('Columns renamed successfully!')
except Exception as e:
    print(e)


Columns renamed successfully!


In [73]:
# Dropping Columns and invalid target rows.
try:
    df = df.drop(['Tender_Id','External_Ref', 'Title', 'Fiscal_Year'], axis=1)
    df['Amount'] = df['Amount'].replace(0.0, np.nan)
    df['Number_Of_Tenderers'] = df['Number_Of_Tenderers'].replace(0.0, np.nan)
    df['Number_Of_Tenderers_Missing'] = df['Number_Of_Tenderers'].isna().astype(int)
    df = df.dropna(subset=['Amount'], how='all').copy()
    df = df.dropna(subset=['Stage'], how='all').copy()
    print('Columns renamed and dropped successfully!')
except Exception as e:
    print(e)

Columns renamed and dropped successfully!


In [74]:
# Rearranging columns
metadata = ['Ocid', 'Date_Published', 'Bid_Opening_Date', 'Duration_In_Days', 'Fiscal_Start_Year', 'Fiscal_End_Year']
categorical = [cols for cols in df.columns if df[cols].dtype == 'object' and cols not in metadata]
numeric = [cols for cols in df.columns if cols not in metadata + categorical and cols != 'Amount']
target = ['Amount']
df = df[metadata + categorical + numeric + target]

# Exporting data
try:
    df.to_csv('data/output/AssamTenderData.csv', index=False)
    print('Data exported successfully!')
except Exception as e:
    print(e)

Data exported successfully!
