In [None]:
from Alasco.alasco import Alasco
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import os
import re

_ = load_dotenv(find_dotenv(raise_error_if_not_found=True))
token = os.environ["token_hines"]
key = os.environ["key_hines"]
alasco = Alasco(token=token, key=key, verbose=True)



In [None]:
dfs = alasco.data_fetcher.get_all_df(property_name="LOVT")

In [None]:
dfs["properties"].head()

## Transform df

In [None]:
df_core = df_lovt_properties[["id", "name"]]
df_core = df_core.rename(columns={"id": "property_id", "name": "property_name"})
df_core.head()


In [None]:
df_lovt_projects = df_lovt_projects[["id", "name", "relationships.property.data.id"]]
df_lovt_projects = df_lovt_projects.rename(columns={"id": "project_id", "name": "project_name", "relationships.property.data.id": "property_id"})
df_core = pd.merge(df_core, df_lovt_projects, on="property_id")
df_core.head(1)

In [None]:
df_contract_units = df_contract_units[["id", "name", "relationships.project.data.id"]]
df_contract_units = df_contract_units.rename(columns={
    "id": "contract_unit_id", 
    "name": "contract_unit_name", 
    "relationships.project.data.id": "project_id",
    })
df_core = pd.merge(df_core, df_contract_units, on="project_id")
df_core.head(1)


In [None]:
df_contracts = df_contracts[["id", "name", "contract_number", "contract_unit", "contractor"]]
df_contracts = df_contracts.rename(columns={
    "id": "contract_id",
    "name": "contract_name",
    "contract_unit": "contract_unit_id",
    "contractor": "contractor_id"
})
df_core = pd.merge(df_core, df_contracts, on="contract_unit_id")
df_core.head(1)

In [None]:
contractor_ids = df_core["contractor_id"].tolist()
contractor_ids = [id for id in contractor_ids if id is not None]
df_contractors = alasco.data_fetcher.get_contractors(contractor_ids=contractor_ids)
df_contractors = df_contractors[["id", "name"]]
df_contractors = df_contractors.rename(columns={"id": "contractor_id", "name": "contractor_name"})
df_core = pd.merge(df_core, df_contractors, on="contractor_id")
df_core.head(1)

In [None]:
df_invoices = df_invoices[["id", "contract", "external_identifier"]]
df_invoices = df_invoices.rename(columns={"id": "invoice_id", "contract": "contract_id","external_identifier": "invoice_number"})
df_invoices = pd.merge(df_core, df_invoices, on="contract_id")
df_invoices.head(1)

In [None]:
df_change_orders = df_change_orders[["id", "contract", "name", "identifier"]]
df_change_orders = df_change_orders.rename(columns={
    "id": "change_order_id", 
    "contract": "contract_id",
    "name": "change_order_name",
    "identifier": "change_order_identifier"
    })
df_change_orders = pd.merge(df_core, df_change_orders, on="contract_id")
df_change_orders.head(1)

## Download documents

In [None]:
invoice_ids = df_invoices["invoice_id"].drop_duplicates().tolist()
invoice_ids = invoice_ids[:10] #todo remove after testing

df_invoice_links = alasco.document_downloader.get_invoice_documents(invoice_ids=invoice_ids)
df_invoice_links = df_invoice_links.rename(columns={
    "id": "invoice_document_id", 
    "relationships.invoice.data.id": "invoice_id", 
    "links.download": "download_link"})

df_invoice_links = df_invoice_links[["invoice_document_id", "filename", "invoice_id", "download_link", "document_type"]]
df_invoices = pd.merge(df_invoices, df_invoice_links, on="invoice_id", how="outer")

df = df_invoices[df_invoices["document_type"] == "INVOICE"]
download_links = df["download_link"].tolist()

def name_invoice(row:pd.Series):
    
    contract_number = re.sub(r'[^a-zA-Z0-9_\-() ]', '', row["contract_number"])
    contractor_name = re.sub(r'[^a-zA-Z0-9_\-() ]', '', row["contractor_name"])
    invoice_number = re.sub(r'[^a-zA-Z0-9_\-() ]', '', row["invoice_number"])

    invoice_name = f"invoice_{contractor_name}_{contract_number}_{invoice_number}.pdf"

    return invoice_name

invoice_names = df.apply(lambda row: name_invoice(row=row), axis=1)

alasco.document_downloader.download_documents(document_download_links=download_links, document_names=invoice_names)

In [None]:
df_contracts = df_core.copy()
contract_ids = df_contracts["contract_id"].drop_duplicates().tolist()
contract_ids = contract_ids[:10] #todo remove after testing

df_contract_links = alasco.document_downloader.get_contract_documents(contract_ids=contract_ids)
df_contract_links = df_contract_links.rename(columns={
    "id": "contract_document_id",
    "relationships.contract.data.id": "contract_id", 
    "links.download": "download_link"})
df_contract_links = df_contract_links[["contract_document_id", "filename", "contract_id", "download_link", "document_type"]]
df_contracts = pd.merge(df_contracts, df_contract_links, on="contract_id", how="outer")

df = df_contracts[df_contracts["document_type"] == "CONTRACT"]
download_links = df["download_link"].tolist()

def name_contract(row:pd.Series):
    
    contractor_name = re.sub(r'[^a-zA-Z0-9_\-() ]', '', row["contractor_name"])
    contract_number = re.sub(r'[^a-zA-Z0-9_\-() ]', '', row["contract_number"])

    contract_name = f"contract_{contractor_name}_{contract_number}.pdf"

    return contract_name

contract_names = df.apply(lambda row: name_contract(row=row), axis=1)

alasco.document_downloader.download_documents(document_download_links=download_links, document_names=contract_names)

In [None]:
change_order_ids = df_change_orders["change_order_id"].drop_duplicates().tolist()
change_order_ids = change_order_ids[:10] #todo remove after testing

df_change_order_links = alasco.document_downloader.get_change_order_documents(change_order_ids=change_order_ids)
df_change_order_links = df_change_order_links.rename(columns={
    "id": "change_order_document_id",
    "relationships.change_order.data.id": "change_order_id",
    "links.download": "download_links"
})
df_change_order_links = df_change_order_links[["change_order_document_id", "change_order_id", "download_links", "document_type", "filename"]]
df_change_orders = pd.merge(df_change_orders, df_change_order_links, on="change_order_id", how="outer")

df = df_change_orders[df_change_orders["document_type"] == "CHANGE_ORDER"]
download_links = df["download_links"].tolist()

def name_change_order(row:pd.Series)-> str: 
    contractor_name = re.sub(r'[^a-zA-Z0-9_\-() ]', '', row["contractor_name"])
    contract_number = re.sub(r'[^a-zA-Z0-9_\-() ]', '', row["contract_number"])
    change_order_identifier = re.sub(r'[^a-zA-Z0-9_\-() ]', '', row["change_order_identifier"])

    change_order_name = f"change_order_{contractor_name}_{contract_number}_{change_order_identifier}.pdf"

    return change_order_name

change_order_names = df.apply(lambda row: name_change_order(row=row), axis=1)

alasco.document_downloader.download_documents(
    document_download_links=download_links,
    document_names=change_order_names
)