# Analytics Notebook

### Connect To MongoDB

In [1]:
## Import Libraries
import os
from pymongo import MongoClient
from dotenv import load_dotenv

# Load environment variables
load_dotenv('../.env.local')
db_uri = os.getenv('MONGODB_URI')

## Connect To DB
client = MongoClient(db_uri)
db = client['test']

In [2]:
# Testing
print(list(db['rewards'].find()))

[{'_id': ObjectId('66feaedd49ba7e8835be8637'), 'rewardCode': 'REW-1234A', 'userId': 'user_2mbSBCVVpY6o84gkOho35Ip0GZn', 'status': 'ACTIVE', 'amount': 15, 'expiryDate': '2024-10-04'}]


## Analytics Dashboard

Uses Cleaned Preprocessed Data Without Sensitive Information. <br/>
Preprocessing of Sensitive Data Is Handled Separately.

In [37]:
## General Libraries
import pandas as pd
import numpy as np
import json
import calendar
from datetime import datetime

## Time Series
from statsmodels.tsa.holtwinters import ExponentialSmoothing

## Suppress Warnings
import warnings
warnings.filterwarnings('ignore')

### Job Analytics

In [4]:
# General Labels
categories_df = pd.read_csv('./datasets/categories.csv')
job_types = categories_df['name'].tolist()
print(job_types)

['Electrician', 'Ventilation', 'Plumber', 'Handyman', 'Aircon']


In [5]:
# Function Extract Date
def extract_start_date(column):
    try:
        col_dict = eval(column)
        start_date = pd.to_datetime(col_dict[0]['start']['$date'])
        return start_date
    except:
        return None
    
def extract_end_date(column):
    try:
        col_dict = eval(column)
        start_date = pd.to_datetime(col_dict[0]['end']['$date'])
        return start_date
    except:
        return None

In [6]:
ja_jobs_df = pd.read_csv('./datasets/ja_jobs.csv')
ja_jobs_df['date'] = pd.to_datetime(ja_jobs_df['date'])

#### Job Type Distribution

In [7]:
# Past 1 Month

## Total
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=1)]
total_job_frequency = recent_jobs['category'].value_counts(sort=False).tolist()
data_job_type_dist = {
    'labels': job_types,
    'data': total_job_frequency
}

with open('../public/analytics/job/job-type-dist-one-month-total.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)

## Average
average_job_frequency = total_job_frequency
data_job_type_dist = {
    'labels': job_types,
    'data': average_job_frequency
}

with open('../public/analytics/job/job-type-dist-one-month-average.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)   

In [8]:
# Past 3 Month
## Total
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=3)]
total_job_frequency = recent_jobs['category'].value_counts(sort=False).tolist()
data_job_type_dist = {
    'labels': job_types,
    'data': total_job_frequency
}

with open('../public/analytics/job/job-type-dist-three-month-total.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)  

## Average
average_job_frequency = [np.ceil(x / 3) for x in total_job_frequency]
data_job_type_dist = {
    'labels': job_types,
    'data': average_job_frequency
}

with open('../public/analytics/job/job-type-dist-three-month-average.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4) 

In [9]:
# Past 6 Month
## Total
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=6)]
total_job_frequency = recent_jobs['category'].value_counts(sort=False).tolist()
data_job_type_dist = {
    'labels': job_types,
    'data': total_job_frequency
}

with open('../public/analytics/job/job-type-dist-six-month-total.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)

## Average
average_job_frequency = [np.ceil(x / 6) for x in total_job_frequency]
data_job_type_dist = {
    'labels': job_types,
    'data': average_job_frequency
}

with open('../public/analytics/job/job-type-dist-six-month-average.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4) 

In [10]:
# Past 12 Month
## Total
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=12)]
total_job_frequency = recent_jobs['category'].value_counts(sort=False).tolist()
data_job_type_dist = {
    'labels': job_types,
    'data': total_job_frequency
}

with open('../public/analytics/job/job-type-dist-twelve-month-total.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)

## Average
average_job_frequency = [np.ceil(x / 12) for x in total_job_frequency]
data_job_type_dist = {
    'labels': job_types,
    'data': average_job_frequency
}

with open('../public/analytics/job/job-type-dist-twelve-month-average.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4) 

#### Job Duration Distribution

In [11]:
# Past 1 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=1)]
duration_stats = recent_jobs.groupby('category').agg(
    total_duration=('duration', 'sum'),
    frequency=('category', 'count')
).reset_index()
duration_stats['average_duration'] = np.ceil(duration_stats['total_duration'] / duration_stats['frequency'])
duration_stats['category'] = pd.Categorical(duration_stats['category'].str.capitalize(), categories=job_types, ordered=True)
duration_stats = duration_stats.sort_values('category').reset_index(drop=True)
print(duration_stats[['category', 'frequency', 'total_duration', 'average_duration']])

data_job_duration_dist = {
    'labels': job_types,
    'data': duration_stats['average_duration'].tolist()
}

with open('../public/analytics/job/job-duration-dist-one-month.json', 'w') as file:
    json.dump(data_job_duration_dist, file, indent=4) 

      category  frequency  total_duration  average_duration
0  Electrician        138           10725              78.0
1  Ventilation         71            9690             137.0
2      Plumber         45            4140              92.0
3     Handyman        165            5595              34.0
4       Aircon        153           38940             255.0


In [12]:
# Past 3 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=3)]
duration_stats = recent_jobs.groupby('category').agg(
    total_duration=('duration', 'sum'),
    frequency=('category', 'count')
).reset_index()
duration_stats['average_duration'] = np.ceil(duration_stats['total_duration'] / duration_stats['frequency'])
duration_stats['category'] = pd.Categorical(duration_stats['category'].str.capitalize(), categories=job_types, ordered=True)
duration_stats = duration_stats.sort_values('category').reset_index(drop=True)
print(duration_stats[['category', 'frequency', 'total_duration', 'average_duration']])

data_job_duration_dist = {
    'labels': job_types,
    'data': duration_stats['average_duration'].tolist()
}

with open('../public/analytics/job/job-duration-dist-three-month.json', 'w') as file:
    json.dump(data_job_duration_dist, file, indent=4) 

      category  frequency  total_duration  average_duration
0  Electrician        222           17370              79.0
1  Ventilation        113           15375             137.0
2      Plumber        179           16575              93.0
3     Handyman        262            8995              35.0
4       Aircon        246           62910             256.0


In [13]:
# Past 6 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=6)]
duration_stats = recent_jobs.groupby('category').agg(
    total_duration=('duration', 'sum'),
    frequency=('category', 'count')
).reset_index()
duration_stats['average_duration'] = np.ceil(duration_stats['total_duration'] / duration_stats['frequency'])
duration_stats['category'] = pd.Categorical(duration_stats['category'].str.capitalize(), categories=job_types, ordered=True)
duration_stats = duration_stats.sort_values('category').reset_index(drop=True)
print(duration_stats[['category', 'frequency', 'total_duration', 'average_duration']])

data_job_duration_dist = {
    'labels': job_types,
    'data': duration_stats['average_duration'].tolist()
}

with open('../public/analytics/job/job-duration-dist-six-month.json', 'w') as file:
    json.dump(data_job_duration_dist, file, indent=4) 

      category  frequency  total_duration  average_duration
0  Electrician        416           32280              78.0
1  Ventilation        284           38805             137.0
2      Plumber        319           29505              93.0
3     Handyman        655           23280              36.0
4       Aircon        460          115050             251.0


In [14]:
# Past 12 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=12)]
duration_stats = recent_jobs.groupby('category').agg(
    total_duration=('duration', 'sum'),
    frequency=('category', 'count')
).reset_index()
duration_stats['average_duration'] = np.ceil(duration_stats['total_duration'] / duration_stats['frequency'])
duration_stats['category'] = pd.Categorical(duration_stats['category'].str.capitalize(), categories=job_types, ordered=True)
duration_stats = duration_stats.sort_values('category').reset_index(drop=True)
print(duration_stats[['category', 'frequency', 'total_duration', 'average_duration']])

data_job_duration_dist = {
    'labels': job_types,
    'data': duration_stats['average_duration'].tolist()
}

with open('../public/analytics/job/job-duration-dist-twelve-month.json', 'w') as file:
    json.dump(data_job_duration_dist, file, indent=4) 

      category  frequency  total_duration  average_duration
0  Electrician        915           70470              78.0
1  Ventilation        585           80295             138.0
2      Plumber        607           55785              92.0
3     Handyman       1347           48505              37.0
4       Aircon       1010          249390             247.0


#### Job Delay Distribution

In [15]:
# Past 1 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=1)]
delay_stats = recent_jobs.groupby('category').agg(
    total_delay=('delay', 'sum'),
    frequency=('category', 'count')
).reset_index()
delay_stats['average_delay'] = np.ceil(delay_stats['total_delay'] / delay_stats['frequency'])
delay_stats['category'] = pd.Categorical(delay_stats['category'].str.capitalize(), categories=job_types, ordered=True)
delay_stats = delay_stats.sort_values('category').reset_index(drop=True)
print(delay_stats[['category', 'frequency', 'total_delay', 'average_delay']])

data_job_delay_dist = {
    'labels': job_types,
    'data': delay_stats['average_delay'].tolist()
}

with open('../public/analytics/job/job-delay-dist-one-month.json', 'w') as file:
    json.dump(data_job_delay_dist, file, indent=4) 

      category  frequency  total_delay  average_delay
0  Electrician        138          700            6.0
1  Ventilation         71          800           12.0
2      Plumber         45          225            5.0
3     Handyman        165          288            2.0
4       Aircon        153         2250           15.0


In [16]:
# Past 3 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=3)]
delay_stats = recent_jobs.groupby('category').agg(
    total_delay=('delay', 'sum'),
    frequency=('category', 'count')
).reset_index()
delay_stats['average_delay'] = np.ceil(delay_stats['total_delay'] / delay_stats['frequency'])
delay_stats['category'] = pd.Categorical(delay_stats['category'].str.capitalize(), categories=job_types, ordered=True)
delay_stats = delay_stats.sort_values('category').reset_index(drop=True)
print(delay_stats[['category', 'frequency', 'total_delay', 'average_delay']])

data_job_delay_dist = {
    'labels': job_types,
    'data': delay_stats['average_delay'].tolist()
}

with open('../public/analytics/job/job-delay-dist-three-month.json', 'w') as file:
    json.dump(data_job_delay_dist, file, indent=4) 

      category  frequency  total_delay  average_delay
0  Electrician        222         1090            5.0
1  Ventilation        113         1240           11.0
2      Plumber        179          795            5.0
3     Handyman        262          498            2.0
4       Aircon        246         3750           16.0


In [17]:
# Past 6 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=6)]
delay_stats = recent_jobs.groupby('category').agg(
    total_delay=('delay', 'sum'),
    frequency=('category', 'count')
).reset_index()
delay_stats['average_delay'] = np.ceil(delay_stats['total_delay'] / delay_stats['frequency'])
delay_stats['category'] = pd.Categorical(delay_stats['category'].str.capitalize(), categories=job_types, ordered=True)
delay_stats = delay_stats.sort_values('category').reset_index(drop=True)
print(delay_stats[['category', 'frequency', 'total_delay', 'average_delay']])

data_job_delay_dist = {
    'labels': job_types,
    'data': delay_stats['average_delay'].tolist()
}

with open('../public/analytics/job/job-delay-dist-six-month.json', 'w') as file:
    json.dump(data_job_delay_dist, file, indent=4) 

      category  frequency  total_delay  average_delay
0  Electrician        416         2130            6.0
1  Ventilation        284         2810           10.0
2      Plumber        319         1495            5.0
3     Handyman        655         1268            2.0
4       Aircon        460         6960           16.0


In [18]:
# Past 12 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=12)]
delay_stats = recent_jobs.groupby('category').agg(
    total_delay=('delay', 'sum'),
    frequency=('category', 'count')
).reset_index()
delay_stats['average_delay'] = np.ceil(delay_stats['total_delay'] / delay_stats['frequency'])
delay_stats['category'] = pd.Categorical(delay_stats['category'].str.capitalize(), categories=job_types, ordered=True)
delay_stats = delay_stats.sort_values('category').reset_index(drop=True)
print(delay_stats[['category', 'frequency', 'total_delay', 'average_delay']])

data_job_delay_dist = {
    'labels': job_types,
    'data': delay_stats['average_delay'].tolist()
}

# Save data to JSON file
with open('../public/analytics/job/job-delay-dist-twelve-month.json', 'w') as file:
    json.dump(data_job_delay_dist, file, indent=4) 

      category  frequency  total_delay  average_delay
0  Electrician        915         4615            6.0
1  Ventilation        585         5520           10.0
2      Plumber        607         3015            5.0
3     Handyman       1347         2698            3.0
4       Aircon       1010        14985           15.0


#### Job Revenue Distribution

In [19]:
ja_revenue_df = pd.read_csv('./datasets/ja_revenue.csv')
ja_revenue_df['date'] = pd.to_datetime(ja_revenue_df['date'])
job_categories = ['electrician', 'ventilation', 'plumber', 'handyman', 'aircon']

In [20]:
# Past 1 Month

## Total
recent_jobs = ja_revenue_df[ja_revenue_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=1)]
total_job_revenue = [recent_jobs[job_type].sum() for job_type in job_categories]

data_job_revenue_dist = {
    'labels': job_types,
    'data': total_job_revenue
}

with open('../public/analytics/job/job-revenue-dist-one-month-total.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)

## Average
average_job_revenue = total_job_revenue
data_job_revenue_dist = {
    'labels': job_types,
    'data': average_job_revenue
}

with open('../public/analytics/job/job-revenue-dist-one-month-average.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)  

In [21]:
# Past 3 Month

## Total
recent_jobs = ja_revenue_df[ja_revenue_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=3)]
total_job_revenue = [recent_jobs[job_type].sum() for job_type in job_categories]

data_job_revenue_dist = {
    'labels': job_types,
    'data': total_job_revenue
}

with open('../public/analytics/job/job-revenue-dist-three-month-total.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)

## Average
average_job_revenue = [round(x / 3, 2) for x in total_job_revenue]

data_job_revenue_dist = {
    'labels': job_types,
    'data': average_job_revenue
}

with open('../public/analytics/job/job-revenue-dist-three-month-average.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)  

In [22]:
# Past 6 Month

## Total
recent_jobs = ja_revenue_df[ja_revenue_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=6)]
total_job_revenue = [recent_jobs[job_type].sum() for job_type in job_categories]

data_job_revenue_dist = {
    'labels': job_types,
    'data': total_job_revenue
}

with open('../public/analytics/job/job-revenue-dist-six-month-total.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)

## Average
average_job_revenue = [round(x / 6, 2) for x in total_job_revenue]
data_job_revenue_dist = {
    'labels': job_types,
    'data': average_job_revenue
}

with open('../public/analytics/job/job-revenue-dist-six-month-average.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)  

In [23]:
# Past 12 Month

## Total
recent_jobs = ja_revenue_df[ja_revenue_df['date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=12)]
total_job_revenue = [recent_jobs[job_type].sum() for job_type in job_categories]
data_job_revenue_dist = {
    'labels': job_types,
    'data': total_job_revenue
}

with open('../public/analytics/job/job-revenue-dist-twelve-month-total.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)

## Average
average_job_revenue = [round(x / 12, 2) for x in total_job_revenue]
data_job_revenue_dist = {
    'labels': job_types,
    'data': average_job_revenue
}

with open('../public/analytics/job/job-revenue-dist-twelve-month-average.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)  

#### Job Revenue Forecasting

In [24]:
# Calculate the date 21 months ago from today
start_date = datetime(2024, 10, 1) - pd.DateOffset(months=24)

def forecast_revenue(df, seasonal_periods, forecast_horizon):
    model = ExponentialSmoothing(df['revenue'], seasonal='add', seasonal_periods=seasonal_periods)
    model_fit = model.fit()
    forecast = model_fit.forecast(forecast_horizon)
    return forecast

In [25]:
# Electrician
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': ja_revenue_df['electrician'].tolist()
}
electrician_df = pd.DataFrame(historical_data)
electrician_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = electrician_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(electrician_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-electrician-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = electrician_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-electrician-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

In [26]:
# Ventilation
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': ja_revenue_df['ventilation'].tolist()
}
ventilation_df = pd.DataFrame(historical_data)
ventilation_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = ventilation_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(ventilation_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-ventilation-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = ventilation_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-ventilation-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

In [27]:
# Plumber
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': ja_revenue_df['plumber'].tolist()
}
plumber_df = pd.DataFrame(historical_data)
plumber_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = plumber_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(plumber_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-plumber-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = plumber_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-plumber-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

In [28]:
# Handyman
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': ja_revenue_df['handyman'].tolist()
}
handyman_df = pd.DataFrame(historical_data)
handyman_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = handyman_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(handyman_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-handyman-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = handyman_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-handyman-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

In [29]:
# Aircon
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': ja_revenue_df['aircon'].tolist()
}
aircon_df = pd.DataFrame(historical_data)
aircon_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = aircon_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(aircon_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-aircon-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = aircon_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-revenue-forecast-aircon-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

#### Job Demand Forecast

In [30]:
ja_jobs_df['year'] = ja_jobs_df['date'].dt.year
ja_jobs_df['month'] = ja_jobs_df['date'].dt.month

def forecast_demand(df, seasonal_periods, forecast_horizon):
    model = ExponentialSmoothing(df['demand'], seasonal='add', seasonal_periods=seasonal_periods)
    model_fit = model.fit()
    forecast = model_fit.forecast(forecast_horizon)
    return forecast

In [31]:
job_counts = ja_jobs_df.groupby(['year', 'month', 'category']).size().reset_index(name='total_jobs')
print(job_counts)

     year  month     category  total_jobs
0    2022     10       aircon          63
1    2022     10  electrician          95
2    2022     10     handyman         100
3    2022     10      plumber          23
4    2022     10  ventilation          45
..    ...    ...          ...         ...
115  2024      9       aircon         153
116  2024      9  electrician         138
117  2024      9     handyman         165
118  2024      9      plumber          45
119  2024      9  ventilation          71

[120 rows x 4 columns]


In [32]:
# Electrician
electrician_counts = job_counts[job_counts['category'] == 'electrician']
print(electrician_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': electrician_counts['total_jobs'].tolist()
}
electrician_df = pd.DataFrame(historical_data)
electrician_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = electrician_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(electrician_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-electrician-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = electrician_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-electrician-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 


     year  month     category  total_jobs
1    2022     10  electrician          95
6    2022     11  electrician          86
11   2022     12  electrician          48
16   2023      1  electrician          29
21   2023      2  electrician          59
26   2023      3  electrician          45
31   2023      4  electrician          88
36   2023      5  electrician          34
41   2023      6  electrician          54
46   2023      7  electrician          66
51   2023      8  electrician         102
56   2023      9  electrician          56
61   2023     10  electrician          84
66   2023     11  electrician          62
71   2023     12  electrician          85
76   2024      1  electrician          29
81   2024      2  electrician         118
86   2024      3  electrician         121
91   2024      4  electrician          96
96   2024      5  electrician          23
101  2024      6  electrician          75
106  2024      7  electrician          48
111  2024      8  electrician     

In [33]:
# Ventilation
ventilation_counts = job_counts[job_counts['category'] == 'ventilation']
print(ventilation_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': ventilation_counts['total_jobs'].tolist()
}
ventilation_df = pd.DataFrame(historical_data)
ventilation_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = ventilation_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(ventilation_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-ventilation-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = ventilation_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-ventilation-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

     year  month     category  total_jobs
4    2022     10  ventilation          45
9    2022     11  ventilation          33
14   2022     12  ventilation          46
19   2023      1  ventilation          16
24   2023      2  ventilation          63
29   2023      3  ventilation          65
34   2023      4  ventilation          52
39   2023      5  ventilation          12
44   2023      6  ventilation          40
49   2023      7  ventilation          26
54   2023      8  ventilation          19
59   2023      9  ventilation          74
64   2023     10  ventilation          65
69   2023     11  ventilation          50
74   2023     12  ventilation          56
79   2024      1  ventilation          65
84   2024      2  ventilation          18
89   2024      3  ventilation          47
94   2024      4  ventilation          65
99   2024      5  ventilation          65
104  2024      6  ventilation          41
109  2024      7  ventilation          13
114  2024      8  ventilation     

In [34]:
# Plumber
plumber_counts = job_counts[job_counts['category'] == 'plumber']
print(plumber_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': plumber_counts['total_jobs'].tolist()
}
plumber_df = pd.DataFrame(historical_data)
plumber_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = plumber_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(plumber_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-plumber-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = plumber_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-plumber-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

     year  month category  total_jobs
3    2022     10  plumber          23
8    2022     11  plumber          18
13   2022     12  plumber          20
18   2023      1  plumber          24
23   2023      2  plumber           6
28   2023      3  plumber          17
33   2023      4  plumber          24
38   2023      5  plumber          24
43   2023      6  plumber          14
48   2023      7  plumber           5
53   2023      8  plumber          10
58   2023      9  plumber          25
63   2023     10  plumber          75
68   2023     11  plumber          69
73   2023     12  plumber          38
78   2024      1  plumber          23
83   2024      2  plumber          47
88   2024      3  plumber          36
93   2024      4  plumber          70
98   2024      5  plumber          27
103  2024      6  plumber          43
108  2024      7  plumber          53
113  2024      8  plumber          81
118  2024      9  plumber          45


In [35]:
# Handyman
handyman_counts = job_counts[job_counts['category'] == 'handyman']
print(handyman_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': handyman_counts['total_jobs'].tolist()
}
handyman_df = pd.DataFrame(historical_data)
handyman_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = handyman_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(handyman_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-handyman-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = handyman_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-handyman-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

     year  month  category  total_jobs
2    2022     10  handyman         100
7    2022     11  handyman          74
12   2022     12  handyman         101
17   2023      1  handyman          35
22   2023      2  handyman         141
27   2023      3  handyman         145
32   2023      4  handyman         116
37   2023      5  handyman          27
42   2023      6  handyman          90
47   2023      7  handyman          58
52   2023      8  handyman          43
57   2023      9  handyman         165
62   2023     10  handyman         150
67   2023     11  handyman         115
72   2023     12  handyman         128
77   2024      1  handyman         150
82   2024      2  handyman          43
87   2024      3  handyman         106
92   2024      4  handyman         151
97   2024      5  handyman         147
102  2024      6  handyman          95
107  2024      7  handyman          29
112  2024      8  handyman          68
117  2024      9  handyman         165


In [36]:
# Aircon
aircon_counts = job_counts[job_counts['category'] == 'aircon']
print(aircon_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': aircon_counts['total_jobs'].tolist()
}
aircon_df = pd.DataFrame(historical_data)
aircon_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = aircon_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(aircon_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-aircon-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = aircon_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job/job-demand-forecast-aircon-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

     year  month category  total_jobs
0    2022     10   aircon          63
5    2022     11   aircon          49
10   2022     12   aircon          55
15   2023      1   aircon          63
20   2023      2   aircon          18
25   2023      3   aircon          45
30   2023      4   aircon          63
35   2023      5   aircon          63
40   2023      6   aircon          40
45   2023      7   aircon          12
50   2023      8   aircon          29
55   2023      9   aircon          70
60   2023     10   aircon          92
65   2023     11   aircon          68
70   2023     12   aircon          93
75   2024      1   aircon          33
80   2024      2   aircon         130
85   2024      3   aircon         134
90   2024      4   aircon         106
95   2024      5   aircon          25
100  2024      6   aircon          83
105  2024      7   aircon          53
110  2024      8   aircon          40
115  2024      9   aircon         153


### Financial Analytics - Invoice

In [21]:
# General Labels
payment_methods_df = pd.read_csv('./datasets/payment_methods.csv')
payment_types = payment_methods_df['name'].tolist()
print(payment_types)

['PayNow', 'Cash', 'Bank Transfer']


In [22]:
ia_payment_df = pd.read_csv('./datasets/ia_payment.csv')
ia_payment_df['Date'] = pd.to_datetime(ia_payment_df['Date'])

#### Payment Type Distribution

In [23]:
# Past 1 Month

## Total
recent_payments = ia_payment_df[ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=1)]
total_payments_frequency = recent_payments['Payment Method'].value_counts(sort=False).tolist()
data_payment_type_dist = {
    'labels': payment_types,
    'data': total_payments_frequency
}

with open('../public/analytics/financial/invoice/payment-type-dist-one-month-total.json', 'w') as file:
    json.dump(data_payment_type_dist, file, indent=4)

## Average
average_payment_frequency = total_payments_frequency
data_payment_type_dist = {
    'labels': payment_types,
    'data': average_payment_frequency
}

with open('../public/analytics/financial/invoice/payment-type-dist-one-month-average.json', 'w') as file:
    json.dump(data_payment_type_dist, file, indent=4)  

In [24]:
# Past 3 Months

## Total
recent_payments = ia_payment_df[ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=3)]
total_payments_frequency = recent_payments['Payment Method'].value_counts(sort=False).tolist()
data_payment_type_dist = {
    'labels': payment_types,
    'data': total_payments_frequency
}

with open('../public/analytics/financial/invoice/payment-type-dist-three-month-total.json', 'w') as file:
    json.dump(data_payment_type_dist, file, indent=4)

## Average
average_payment_frequency = [np.ceil(x / 3) for x in total_payments_frequency]
data_payment_type_dist = {
    'labels': payment_types,
    'data': average_payment_frequency
}

with open('../public/analytics/financial/invoice/payment-type-dist-three-month-average.json', 'w') as file:
    json.dump(data_payment_type_dist, file, indent=4)   

In [25]:
# Past 6 Months

## Total
recent_payments = ia_payment_df[ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=6)]
total_payments_frequency = recent_payments['Payment Method'].value_counts(sort=False).tolist()
data_payment_type_dist = {
    'labels': payment_types,
    'data': total_payments_frequency
}

with open('../public/analytics/financial/invoice/payment-type-dist-six-month-total.json', 'w') as file:
    json.dump(data_payment_type_dist, file, indent=4)

## Average
average_payment_frequency = [np.ceil(x / 6) for x in total_payments_frequency]
data_payment_type_dist = {
    'labels': payment_types,
    'data': average_payment_frequency
}

with open('../public/analytics/financial/invoice/payment-type-dist-six-month-average.json', 'w') as file:
    json.dump(data_payment_type_dist, file, indent=4)  

In [26]:
# Past 12 Months

## Total
recent_payments = ia_payment_df[ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=12)]
total_payments_frequency = recent_payments['Payment Method'].value_counts(sort=False).tolist()
data_payment_type_dist = {
    'labels': payment_types,
    'data': total_payments_frequency
}

with open('../public/analytics/financial/invoice/payment-type-dist-twelve-month-total.json', 'w') as file:
    json.dump(data_payment_type_dist, file, indent=4)

## Average
average_payment_frequency = [np.ceil(x / 12) for x in total_payments_frequency]
data_payment_type_dist = {
    'labels': payment_types,
    'data': average_payment_frequency
}

with open('../public/analytics/financial/invoice/payment-type-dist-twelve-month-average.json', 'w') as file:
    json.dump(data_payment_type_dist, file, indent=4)   

#### Payment Duration Distribution

In [51]:
# Payment Duration
all_durations = [3, 5, 7, 10, 14]

def generate_payment_duration_dist(recent_payments):
    month = calendar.month_name[recent_payments['Date'].iloc[0].month]
    
    ## PayNow
    paynow_counts = recent_payments[recent_payments['Payment Method'] == 'PayNow']
    total_duration_frequency = paynow_counts['Payment Duration'].value_counts(sort=False)
    total_duration_frequency = total_duration_frequency.reindex(all_durations, fill_value=0)
    data_payment_type_dist = {
        'labels': all_durations,
        'data': total_duration_frequency.tolist()
    }

    with open('../public/analytics/financial/invoice/payment-duration-dist-paynow-{}.json'.format(month), 'w') as file:
        json.dump(data_payment_type_dist, file, indent=4) 

    ## BankTransfer
    bank_counts = recent_payments[recent_payments['Payment Method'] == 'Bank Transfer']
    total_duration_frequency = bank_counts['Payment Duration'].value_counts(sort=False)
    total_duration_frequency = total_duration_frequency.reindex(all_durations, fill_value=0)
    data_payment_type_dist = {
        'labels': all_durations,
        'data': total_duration_frequency.tolist()
    }

    with open('../public/analytics/financial/invoice/payment-duration-dist-bank-{}.json'.format(month), 'w') as file:
        json.dump(data_payment_type_dist, file, indent=4)

In [52]:
# Past 1 Month
recent_payments = ia_payment_df[ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=1)]
generate_payment_duration_dist(recent_payments)

In [53]:
# Past 2 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=2)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=1))]
generate_payment_duration_dist(recent_payments)

In [54]:
# Past 3 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=3)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=2))]
generate_payment_duration_dist(recent_payments)

In [55]:
# Past 4 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=4)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=3))]
generate_payment_duration_dist(recent_payments)

In [56]:
# Past 5 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=5)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=4))]
generate_payment_duration_dist(recent_payments)

In [57]:
# Past 6 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=6)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=5))]
generate_payment_duration_dist(recent_payments)

In [58]:
# Past 7 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=7)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=6))]
generate_payment_duration_dist(recent_payments)

In [59]:
# Past 8 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=8)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=7))]
generate_payment_duration_dist(recent_payments)

In [60]:
# Past 9 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=9)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=8))]
generate_payment_duration_dist(recent_payments)

In [61]:
# Past 10 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=10)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=9))]
generate_payment_duration_dist(recent_payments)

In [62]:
# Past 11 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=11)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=10))]
generate_payment_duration_dist(recent_payments)

In [63]:
# Past 12 Month
recent_payments = ia_payment_df[(ia_payment_df['Date'] >= datetime(2024, 10, 1) - pd.DateOffset(months=12)) & (ia_payment_df['Date'] < datetime(2024, 10, 1) - pd.DateOffset(months=11))]
generate_payment_duration_dist(recent_payments)

### Financial Analytics - Quotation

#### Response Duration Distribution

In [66]:
qa_duration_df = pd.read_csv('./datasets/qa_duration.csv')

In [67]:
# Filter for Last 12 Months
df_last_12_months = qa_duration_df[qa_duration_df['Period'] == 'Last 12 Months']
category_counts = df_last_12_months.groupby('Duration Category').size()
data_response_duration_dist = {
    'labels': category_counts.index.tolist(),
    'data': category_counts.values.tolist()
}

with open('../public/analytics/financial/quotation/response-duration-dist-last-twelve-month.json', 'w') as file:
    json.dump(data_response_duration_dist, file, indent=4)


In [68]:
# Filter for Last 13-24 Months (Prior 12 Months)
df_prior_12_months = qa_duration_df[qa_duration_df['Period'] == 'Last 13-24 Months']
category_counts = df_prior_12_months.groupby('Duration Category').size()
data_response_duration_dist = {
    'labels': category_counts.index.tolist(),
    'data': category_counts.values.tolist()
}

with open('../public/analytics/financial/quotation/response-duration-dist-last-twentyfour-month.json', 'w') as file:
    json.dump(data_response_duration_dist, file, indent=4)

#### Response Reasoning Distribution

In [74]:
qa_reason_df = pd.read_csv('./datasets/qa_reason.csv')

In [75]:
# Filter for Last 12 Months
df_last_12_months = qa_reason_df[qa_reason_df['Period'] == 'Last 12 Months']
reason_counts = df_last_12_months.groupby('Reason').size()
proportion = round((reason_counts / reason_counts.sum() * 100), 1)

data_response_reason_dist = {
    'labels': reason_counts.index.tolist(),
    'data': proportion.tolist()
}

with open('../public/analytics/financial/quotation/response-reason-dist-last-twelve-month.json', 'w') as file:
    json.dump(data_response_reason_dist, file, indent=4)

In [76]:
# Filter for Last 13-24 Months (Prior 12 Months)
df_prior_12_months = qa_reason_df[qa_reason_df['Period'] == 'Last 13-24 Months']
reason_counts = df_prior_12_months.groupby('Reason').size()
proportion = round((reason_counts / reason_counts.sum() * 100), 1)

data_response_reason_dist = {
    'labels': reason_counts.index.tolist(),
    'data': proportion.tolist()
}

with open('../public/analytics/financial/quotation/response-reason-dist-last-twentyfour-month.json', 'w') as file:
    json.dump(data_response_reason_dist, file, indent=4)