# Dashboard for eFarmersHub

The objective of the dashboard is to monitor data quality, and provide strategic, operational insights as well as advanced analytics.

In [1]:
# data manipulation and analysis
import pandas as pd
import numpy as np

# dash
import dash
from dash import dcc
from dash import html
import plotly.express as px

# database
from sqlalchemy import create_engine, MetaData, inspect, Table, Column, Integer, String, Date, Numeric
from sqlalchemy.engine.url import URL
from sqlalchemy.sql import select

# env file
from dotenv import load_dotenv
import os

# path handling
from pathlib import Path

# logging
import logging

In [2]:
# load env variables
dotenv_path = Path("./.env")
load_dotenv(dotenv_path=dotenv_path)

USERNAME = os.getenv("USERNAME")
PASSWORD = os.getenv("PASSWORD")
HOST = os.getenv("HOST")
PORT = os.getenv("PORT")
DATABASE = os.getenv("DATABASE")

## 1. Sale

In [3]:
def extract_sale(engine):
    """
    read sale table from sql database and returns df
    :param engine: SQLAlchemy engine object
    :return df: sale dataframe
    """
    
    try:
        with engine.connect() as conn:
            query = """
                SELECT country_name, parent_name, user_region, user_type, user_name, user_id, customer_id, customer_name,
                    customer_mobile, market_type, business_category, transaction_date, transaction_id,
                    currency_exchange_rate, net_amount, cogs_amount, version
                FROM gds_sale_transactions
                WHERE YEAR(transaction_date) = 2022
                """
            df = pd.read_sql(query, conn)
    except Exception as e:
        logging.basicConfig(filename="./log", filemode="a", format="%(asctime)s - %(levelname)s - %(message)s",
            level=logging.ERROR)
        logging.error(e)
        
    return df

In [4]:
def transform_sale(df):
    """
    transform sale dataframe and returns df
    :param df: actual sale dataframe
    :return df: transformed dataframe
    """
    
    # drop duplicates
    df.drop_duplicates(inplace=True, ignore_index=True)
    
    # convert date_of_transaction to datetime
    df["transaction_date"] = pd.to_datetime(df["transaction_date"], format="%Y/%m/%d")

    # convert user_id to string
    df["user_id"] = df["user_id"].astype(str)
    df["customer_id"] = df["customer_id"].astype(str)
    df["customer_mobile"] = df["customer_mobile"].astype(str)
    df["transaction_id"] = df["transaction_id"].astype(str)

    # convert and round numerical columns
    df["net_amount"] = df["net_amount"].astype(float)
    df["cogs_amount"] = df["cogs_amount"].astype(float)
    df["currency_exchange_rate"] = df["currency_exchange_rate"].astype(float)

    # group by tansaction for revenue
    df = df.groupby(["country_name", "parent_name", "user_region", "user_type", "user_name", "user_id", "customer_id",
        "customer_name", "customer_mobile", "market_type", "business_category", "transaction_date", "transaction_id",
        "version"]) \
        .agg(cogs_amount=("cogs_amount", "sum"),
            net_amount=("net_amount", "sum"),
            currency_exchange_rate=("currency_exchange_rate", "mean")).reset_index()

    # profit & loss
    df.rename(columns={"net_amount" : "revenue"}, inplace=True)
    df["revenue_usd"] = round(df["revenue"] / df["currency_exchange_rate"], 4)
    df["profit"] = df["revenue"] - df["cogs_amount"]
    df['profit_usd'] = round(df["profit"] / df["currency_exchange_rate"] ,4)

    # add transaction_type_level_2 column
    df["transaction_category"] = "Sale"

    # sorting data based on version and keep the latest version only
    df = df.sort_values(["country_name", "parent_name", "user_id", "transaction_id", "version"]) \
            .drop_duplicates(subset=["transaction_id"], keep="last")

    return df

## 2. Machine Rent

In [5]:
def extract_machine_rent(engine):
    """
    read machine rent table from sql database and returns df
    :param engine: SQLAlchemy engine object
    :return df: sale dataframe
    """
    try:
        with engine.connect() as conn:
            query = """
                SELECT country_name, parent_name, user_region, user_type, user_name, user_id, customer_id, customer_name,
                    customer_mobile, business_category, transaction_date, transaction_id, currency_exchange_rate,
                    net_amount, version
                FROM gds_machine_rent_transactions
                WHERE YEAR(transaction_date) = 2022
                """
            df = pd.read_sql(query, conn)
    except Exception as e:
        logging.basicConfig(filename="./log", filemode="a", format="%(asctime)s - %(levelname)s - %(message)s", level=logging.ERROR)
        logging.error(e)
        
    return df

In [6]:
def transform_machine_rent(df):
    """
    transform machine_rent dataframe and returns df
    :param df: actual machine_rent dataframe
    :return df: transformed dataframe
    """
    # drop duplicates
    df.drop_duplicates(inplace=True, ignore_index=True)

    # convert date_of_transaction to datetime
    df["transaction_date"] = pd.to_datetime(df["transaction_date"], format="%Y/%m/%d")

    # convert user_id to string
    df["user_id"] = df["user_id"].astype(str)
    df["customer_id"] = df["customer_id"].astype(str)
    df["customer_mobile"] = df["customer_mobile"].astype(str)
    df["transaction_id"] = df["transaction_id"].astype(str)

    # convert and round numerical columns
    df["net_amount"] = df["net_amount"].astype(float)
    
    # group by tansaction for revenue
    df = df.groupby(["country_name", "parent_name", "user_region", "user_type", "user_name", "user_id", "customer_id",
        "customer_name", "customer_mobile", "business_category", "transaction_date", "transaction_id",
        "version"]) \
        .agg(net_amount=("net_amount", "sum"),
            currency_exchange_rate=("currency_exchange_rate", "mean")).reset_index()
    
    # profit & loss
    df.rename(columns={"net_amount" : "revenue"}, inplace=True)
    df["revenue_usd"] = round(df["revenue"] / df["currency_exchange_rate"], 4)
    df["profit"] = df["revenue"]
    df['profit_usd'] = round(df["profit"] / df["currency_exchange_rate"] ,4)

    # add market_type column
    df["market_type"] = "Farmer"
    df["transaction_category"] = "Machinery Rental"

    # sorting data based on version and keep the latest version only
    df = df.sort_values(["country_name", "parent_name", "user_id", "transaction_id", "version"]) \
            .drop_duplicates(subset=["transaction_id"], keep="last")

    return df

## 3. Advisory

In [7]:
def extract_advisory(engine):
    """
    read advisory table from sql database and returns df
    :param engine: SQLAlchemy engine object
    :return df: sale dataframe
    """
    try:
        with engine.connect() as conn:
            query = """
                SELECT country_name, parent_name, user_region, user_type, user_name, user_id, customer_id, customer_name,
                    customer_mobile, business_categories, transaction_date, transaction_id, currency_exchange_rate,
                    amount, version
                FROM gds_advisory_transactions
                WHERE YEAR(transaction_date) = 2022
                """
            df = pd.read_sql(query, conn)
    except Exception as e:
        logging.basicConfig(filename="./log", filemode="a", format="%(asctime)s - %(levelname)s - %(message)s", level=logging.ERROR)
        logging.error(e)
        
    return df

In [8]:
def transform_advisory(df):
    """
    transform advisory dataframe and returns df
    :param df: actual machine_rent dataframe
    :return df: transformed dataframe
    """
    
    # drop duplicates
    df.drop_duplicates(inplace=True, ignore_index=True)

    # convert date_of_transaction to datetime
    df["transaction_date"] = pd.to_datetime(df["transaction_date"], format="%Y/%m/%d")

    # convert user_id to string
    df["user_id"] = df["user_id"].astype(str)
    df["customer_id"] = df["customer_id"].astype(str)
    df["customer_mobile"] = df["customer_mobile"].astype(str)
    df["transaction_id"] = df["transaction_id"].astype(str)

    # convert and round numerical columns
    df["amount"] = df["amount"].astype(float)
    
    # group by tansaction for revenue
    df = df.groupby(["country_name", "parent_name", "user_region", "user_type", "user_name", "user_id", "customer_id",
        "customer_name", "customer_mobile", "business_categories", "transaction_date", "transaction_id",
        "version"]) \
        .agg(amount=("amount", "sum"),
            currency_exchange_rate=("currency_exchange_rate", "mean")).reset_index()
    
    # profit & loss
    df.rename(columns={"amount" : "revenue",
        "business_categories" : "business_category"}, inplace=True)
    df["revenue_usd"] = round(df["revenue"] / df["currency_exchange_rate"], 4)
    df["profit"] = df["revenue"]
    df['profit_usd'] = round(df["profit"] / df["currency_exchange_rate"] ,4)

    # add market_type column
    df["market_type"] = "Farmer"
    df["transaction_category"] = "Advisory"

    # sorting data based on version and keep the latest version only
    df = df.sort_values(["country_name", "parent_name", "user_id", "transaction_id", "version"]) \
            .drop_duplicates(subset=["transaction_id"], keep="last")

    return df

## 4. Purchase

In [9]:
def extract_purchase(engine):
    """
    read purchase table from sql database and returns df
    :param engine: SQLAlchemy engine object
    :return df: sale dataframe
    """
    try:
        with engine.connect() as conn:
            query = """
                SELECT country_name, parent_name, user_region, user_type, user_name, user_id, supplier_id, supplier_name,
                    supplier_mobile, market_type, business_category, transaction_date, transaction_id,
                    currency_exchange_rate, net_amount, version
                FROM gds_purchase_transactions
                WHERE YEAR(transaction_date) = 2022
                """
            df = pd.read_sql(query, conn)
    except Exception as e:
        logging.basicConfig(filename="./log", filemode="a", format="%(asctime)s - %(levelname)s - %(message)s", level=logging.ERROR)
        logging.error(e)
        
    return df

In [10]:
def transform_purchase(df):
    """
    transform purchase dataframe and returns df
    :param df: actual purchase dataframe
    :return df: transformed dataframe
    """
    # drop duplicates
    df.drop_duplicates(inplace=True, ignore_index=True)
    
    # convert transaction_date to datetime
    df["transaction_date"] = pd.to_datetime(df["transaction_date"], format="%Y/%m/%d")

    # convert user_id to string
    df["user_id"] = df["user_id"].astype(str)
    df["supplier_id"] = df["supplier_id"].astype(str)
    df["supplier_mobile"] = df["supplier_mobile"].astype(str)
    df["transaction_id"] = df["transaction_id"].astype(str)

    # convert and round numerical columns
    df["net_amount"] = df["net_amount"].astype(float)
    df["currency_exchange_rate"] = df["currency_exchange_rate"].astype(float)
    
    # group by tansaction for revenue
    df = df.groupby(["country_name", "parent_name", "user_region", "user_type", "user_name", "user_id", "supplier_id",
        "supplier_name", "supplier_mobile", "market_type", "business_category", "transaction_date", "transaction_id",
        "version"]) \
        .agg(net_amount=("net_amount", "sum"),
            currency_exchange_rate=("currency_exchange_rate", "mean")).reset_index()

    # usd conversion
    df["net_amount_usd"] = round(df["net_amount"] / df["currency_exchange_rate"], 4)

    # add market_type column
    df["transaction_category"] = "Purchase"

    # sorting data based on version and keep the latest version only
    df = df.sort_values(["country_name", "parent_name", "user_id", "transaction_id", "version"]) \
            .drop_duplicates(subset=["transaction_id"], keep="last")

    return df

## 5. Processing

In [11]:
def extract_processing(engine):
    """
    read processing table from sql database and returns df
    :param engine: SQLAlchemy engine object
    :return df: sale dataframe
    """
    try:
        with engine.connect() as conn:
            query = """
                SELECT country_name, parent_name, user_region, user_type, user_name, user_id, business_category,
                    transaction_date, transaction_id, currency_exchange_rate, production_cost, version
                FROM gds_processing_transactions
                WHERE YEAR(transaction_date) = 2022
                """
            df = pd.read_sql(query, conn)
    except Exception as e:
        logging.basicConfig(filename="./log", filemode="a", format="%(asctime)s - %(levelname)s - %(message)s", level=logging.ERROR)
        logging.error(e)
        
    return df

In [12]:
def transform_processing(df):
    """
    transform processing dataframe and returns df
    :param df: actual processing dataframe
    :return df: transformed dataframe
    """
    # drop duplicates
    df.drop_duplicates(inplace=True, ignore_index=True)
    
    # convert transaction_date to datetime
    df["transaction_date"] = pd.to_datetime(df["transaction_date"], format="%Y/%m/%d")

    # convert user_id to string
    df["user_id"] = df["user_id"].astype(str)
    df["transaction_id"] = df["transaction_id"].astype(str)

    # convert and round numerical columns
    df["production_cost"] = df["production_cost"].astype(float)
    df["currency_exchange_rate"] = df["currency_exchange_rate"].astype(float)

    # group by tansaction for revenue
    df = df.groupby(["country_name", "parent_name", "user_region", "user_type", "user_name", "user_id",
        "business_category", "transaction_date", "transaction_id", "version"]) \
        .agg(production_cost=("production_cost", "sum"),
            currency_exchange_rate=("currency_exchange_rate", "mean")).reset_index()

    # usd conversion
    df.rename(columns={"production_cost" : "net_amount"}, inplace=True)
    df["net_amount_usd"] = round(df["net_amount"] / df["currency_exchange_rate"], 4)

    # add transaction category column
    df["market_type"] = "Farmer's Hub"
    df["transaction_category"] = "Processing"

    # sorting data based on version and keep the latest version only
    df = df.sort_values(["country_name", "parent_name", "user_id", "transaction_id", "version"]) \
            .drop_duplicates(subset=["transaction_id"], keep="last")

    return df

## 6. Expenses

In [13]:
def extract_expense(engine):
    """
    read expense table from sql database and returns df
    :param engine: SQLAlchemy engine object
    :return df: sale dataframe
    """
    try:
        with engine.connect() as conn:
            query = """
                SELECT country_name, parent_name, user_region, user_type, user_name, user_id, expense_category,
                    business_category, transaction_date, transaction_id, currency_exchange_rate, total_amount, version
                FROM gds_expense_transactions
                WHERE YEAR(transaction_date) = 2022
                """
            df = pd.read_sql(query, conn)
    except Exception as e:
        logging.basicConfig(filename="./log", filemode="a", format="%(asctime)s - %(levelname)s - %(message)s", level=logging.ERROR)
        logging.error(e)
        
    return df

In [14]:
def transform_expense(df):
    """
    transform expense dataframe and returns df
    :param df: actual expense dataframe
    :return df: transformed dataframe
    """
    # drop duplicates
    df.drop_duplicates(inplace=True, ignore_index=True)
    
    # convert transaction_date to datetime
    df["transaction_date"] = pd.to_datetime(df["transaction_date"], format="%Y/%m/%d")

    # convert user_id to string
    df["user_id"] = df["user_id"].astype(str)
    df["transaction_id"] = df["transaction_id"].astype(str)

    # convert and round numerical columns
    df["total_amount"] = df["total_amount"].astype(float)
    df["currency_exchange_rate"] = df["currency_exchange_rate"].astype(float)

    # group by tansaction for revenue
    df = df.groupby(["country_name", "parent_name", "user_region", "user_type", "user_name", "user_id",
        "business_category", "expense_category", "transaction_date", "transaction_id", "version"]) \
        .agg(total_amount=("total_amount", "sum"),
            currency_exchange_rate=("currency_exchange_rate", "mean")).reset_index()

    # usd conversion
    df.rename(columns={"total_amount" : "net_amount"}, inplace=True)
    df["net_amount_usd"] = round(df["net_amount"] / df["currency_exchange_rate"], 4)

    # add transaction category column
    df["market_type"] = "Farmer's Hub"
    df["transaction_category"] = "Expense"

    # sorting data based on version and keep the latest version only
    df = df.sort_values(["country_name", "parent_name", "user_id", "transaction_id", "version"]) \
            .drop_duplicates(subset=["transaction_id"], keep="last")

    return df

## 7. Machine Purchase

In [15]:
def extract_machine_purchase(engine):
    """
    read machine_purchase table from sql database and returns df
    :param engine: SQLAlchemy engine object
    :return df: sale dataframe
    """
    try:
        with engine.connect() as conn:
            query = """
                SELECT country_name, parent_name, user_region, user_type, user_name, user_id, supplier_id, supplier_name,
                    supplier_mobile, business_category, transaction_date, transaction_id, currency_exchange_rate,
                    total_amount, version
                FROM gds_machine_purchase_transactions
                WHERE YEAR(transaction_date) = 2022
                """
            df = pd.read_sql(query, conn)
    except Exception as e:
        logging.basicConfig(filename="./log", filemode="a", format="%(asctime)s - %(levelname)s - %(message)s", level=logging.ERROR)
        logging.error(e)
        
    return df

In [16]:
def transform_machine_purchase(df):
    """
    transform machine_purchase dataframe and returns df
    :param df: actual machine_purchase dataframe
    :return df: transformed dataframe
    """
    # drop duplicates
    df.drop_duplicates(inplace=True, ignore_index=True)
    
    # convert transaction_date to datetime
    df["transaction_date"] = pd.to_datetime(df["transaction_date"], format="%Y/%m/%d")

    # convert user_id to string
    df["user_id"] = df["user_id"].astype(str)
    df["supplier_id"] = df["supplier_id"].astype(str)
    df["supplier_mobile"] = df["supplier_mobile"].astype(str)
    df["transaction_id"] = df["transaction_id"].astype(str)

    # convert and round numerical columns
    df["total_amount"] = df["total_amount"].astype(float)
    df["currency_exchange_rate"] = df["currency_exchange_rate"].astype(float)

    # group by tansaction for revenue
    df = df.groupby(["country_name", "parent_name", "user_region", "user_type", "user_name", "user_id", "supplier_id",
        "supplier_name", "supplier_mobile", "business_category", "transaction_date", "transaction_id", "version"]) \
        .agg(total_amount=("total_amount", "sum"),
            currency_exchange_rate=("currency_exchange_rate", "mean")).reset_index()    

    # usd conversion
    df.rename(columns={"total_amount" : "net_amount"}, inplace=True)
    df["net_amount_usd"] = round(df["net_amount"] / df["currency_exchange_rate"], 4)

    # add transaction category column
    df["market_type"] = "Farmer's Hub"
    df["transaction_category"] = "Machinery Purchase"

    # sorting data based on version and keep the latest version only
    df = df.sort_values(["country_name", "parent_name", "user_id", "transaction_id", "version"]) \
            .drop_duplicates(subset=["transaction_id"], keep="last")

    return df

## 7. User Information

In [17]:
def extract_user(engine):
    """
    read user_information table from sql database and returns df
    :param engine: SQLAlchemy engine object
    :return df: sale dataframe
    """
    try:
        with engine.connect() as conn:
            # df = pd.read_sql_table("gds_users_information", conn, columns=["country_name", "parent_name",
            #     "user_region", "user_type", "user_name", "user_id", "supplier_name", "supplier_mobile", "supplier_id",
            #     "business_category", "product", "category", "product_id", "transaction_date", "transaction_id",
            #     "quantity", "unit_price", "total_amount", "paid_amount", "currency_exchange_rate", "version"])
            df = pd.read_sql_table("gds_users_information", conn)
    except Exception as e:
        logging.basicConfig(filename="./log", filemode="a", format="%(asctime)s - %(levelname)s - %(message)s", level=logging.ERROR)
        logging.error(e)
        
    return df

## MAIN

In [18]:
if __name__ == "__main__":
    # connect to database
    connect_url = URL.create(
        "mysql+pymysql",
        username=USERNAME,
        password=PASSWORD,
        host=HOST,
        port=PORT,
        database=DATABASE
    )

    # engine = create_engine(connect_url, echo=True) # debug
    engine = create_engine(connect_url)

    # debug
    # with engine.connect() as conn:
    #     inspector = inspect(engine)
    #     table_names = inspector.get_table_names()
    #     print(table_names)

    # sale
    sale = extract_sale(engine)
    sale = transform_sale(sale)
    # sale.to_csv("sale.csv", index=False)

    # machine rent
    machine_rent = extract_machine_rent(engine)
    machine_rent = transform_machine_rent(machine_rent)
    # machine_rent.to_csv("machine_rent.csv", index=False)

    # advisory
    advisory = extract_advisory(engine)
    advisory = transform_advisory(advisory)
    # advisory.to_csv("advisory.csv", index=False)

    # purchase
    purchase = extract_purchase(engine)
    purchase = transform_purchase(purchase)
    # purchase.to_csv("purchase.csv", index=False)

    # processing
    processing = extract_processing(engine)
    processing = transform_processing(processing)
    # processing.to_csv("processing.csv", index=False)

    # expense
    expense = extract_expense(engine)
    expense = transform_expense(expense)
    # expense.to_csv("expense.csv", index=False)

    # machine purchase
    machine_purchase = extract_machine_purchase(engine)
    machine_purchase = transform_machine_purchase(machine_purchase)
    # machine_purchase.to_csv("machine_purchase.csv", index=False)

    # append df
    df = pd.concat([sale, machine_rent, advisory, purchase, processing, expense, machine_purchase], sort=False,
        ignore_index=True)
    df.to_sql('financial_master_table', con=engine, if_exists='replace', index = False)

In [19]:
print(sale.shape, machine_rent.shape, advisory.shape, purchase.shape, expense.shape, processing.shape,
    machine_purchase.shape)

(36167, 21) (869, 20) (10, 20) (4035, 18) (1071, 16) (764, 15) (42, 18)
