# Analytics Notebook

### Connect To MongoDB

In [1]:
## Import Libraries
import os
from pymongo import MongoClient
from dotenv import load_dotenv

# Load environment variables
load_dotenv('../.env.local')
db_uri = os.getenv('MONGODB_URI')

## Connect To DB
client = MongoClient(db_uri)
db = client['test']

In [2]:
# Testing
print(list(db['vehicles'].find()))

[]


## Analytics Dashboard

In [3]:
## General Libraries
import pandas as pd
import numpy as np
import json
import pytz
from datetime import datetime, timedelta
from statsmodels.tsa.holtwinters import ExponentialSmoothing

### Job Analytics

In [4]:
# General Labels
categories_df = pd.read_csv('./datasets/categories.csv')
job_types = categories_df['name'].tolist()
print(job_types)

['Electrician', 'Ventilation', 'Plumber', 'Handyman', 'Aircon']


In [5]:
# Function Extract Date
def extract_start_date(column):
    try:
        col_dict = eval(column)
        start_date = pd.to_datetime(col_dict[0]['start']['$date'])
        return start_date
    except:
        return None
    
def extract_end_date(column):
    try:
        col_dict = eval(column)
        start_date = pd.to_datetime(col_dict[0]['end']['$date'])
        return start_date
    except:
        return None

In [6]:
ja_jobs_df = pd.read_csv('./datasets/ja_jobs.csv')
ja_jobs_df['date'] = pd.to_datetime(ja_jobs_df['date'])

#### Job Type Distribution

In [7]:
# Past 1 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=1)]
total_job_frequency = recent_jobs['category'].value_counts().tolist()
print(total_job_frequency)

data_job_type_dist = {
    'labels': job_types,
    'data': total_job_frequency
}

# Save data to JSON file
with open('../public/analytics/job-type-dist-one-month-total.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)

average_job_frequency = total_job_frequency

data_job_type_dist = {
    'labels': job_types,
    'data': average_job_frequency
}

# Save data to JSON file
with open('../public/analytics/job-type-dist-one-month-average.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)   

[152, 146, 126, 75, 57]


In [8]:
# Past 3 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=3)]
total_job_frequency = recent_jobs['category'].value_counts().tolist()
print(total_job_frequency)

data_job_type_dist = {
    'labels': job_types,
    'data': total_job_frequency
}

# Save data to JSON file
with open('../public/analytics/job-type-dist-three-month-total.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)  

average_job_frequency = [np.ceil(x / 3) for x in total_job_frequency]

data_job_type_dist = {
    'labels': job_types,
    'data': average_job_frequency
}

# Save data to JSON file
with open('../public/analytics/job-type-dist-three-month-average.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4) 

[248, 243, 222, 204, 121]


In [9]:
# Past 6 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=6)]
total_job_frequency = recent_jobs['category'].value_counts().tolist()
print(total_job_frequency)

data_job_type_dist = {
    'labels': job_types,
    'data': total_job_frequency
}

# Save data to JSON file
with open('../public/analytics/job-type-dist-six-month-total.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)

average_job_frequency = [np.ceil(x / 6) for x in total_job_frequency]

data_job_type_dist = {
    'labels': job_types,
    'data': average_job_frequency
}

# Save data to JSON file
with open('../public/analytics/job-type-dist-six-month-average.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4) 

[605, 461, 396, 385, 300]


In [10]:
# Past 12 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=12)]
total_job_frequency = recent_jobs['category'].value_counts().tolist()
print(total_job_frequency)

data_job_type_dist = {
    'labels': job_types,
    'data': total_job_frequency
}

# Save data to JSON file
with open('../public/analytics/job-type-dist-twelve-month-total.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4)

average_job_frequency = [np.ceil(x / 12) for x in total_job_frequency]

data_job_type_dist = {
    'labels': job_types,
    'data': average_job_frequency
}

# Save data to JSON file
with open('../public/analytics/job-type-dist-twelve-month-average.json', 'w') as file:
    json.dump(data_job_type_dist, file, indent=4) 

[1250, 1011, 843, 762, 619]


#### Job Duration Distribution

In [11]:
# Past 1 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=1)]
duration_stats = recent_jobs.groupby('category').agg(
    total_duration=('duration', 'sum'),
    frequency=('category', 'count')
).reset_index()
duration_stats['average_duration'] = np.ceil(duration_stats['total_duration'] / duration_stats['frequency'])
duration_stats['category'] = pd.Categorical(duration_stats['category'].str.capitalize(), categories=job_types, ordered=True)
duration_stats = duration_stats.sort_values('category').reset_index(drop=True)
print(duration_stats[['category', 'frequency', 'total_duration', 'average_duration']])

data_job_duration_dist = {
    'labels': job_types,
    'data': duration_stats['average_duration'].tolist()
}

# Save data to JSON file
with open('../public/analytics/job-duration-dist-one-month.json', 'w') as file:
    json.dump(data_job_duration_dist, file, indent=4) 

      category  frequency  total_duration  average_duration
0  Electrician        152           13245              88.0
1  Ventilation         75           14130             189.0
2      Plumber         57            7560             133.0
3     Handyman        146            6600              46.0
4       Aircon        126           59490             473.0


In [12]:
# Past 3 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=3)]
duration_stats = recent_jobs.groupby('category').agg(
    total_duration=('duration', 'sum'),
    frequency=('category', 'count')
).reset_index()
duration_stats['average_duration'] = np.ceil(duration_stats['total_duration'] / duration_stats['frequency'])
duration_stats['category'] = pd.Categorical(duration_stats['category'].str.capitalize(), categories=job_types, ordered=True)
duration_stats = duration_stats.sort_values('category').reset_index(drop=True)
print(duration_stats[['category', 'frequency', 'total_duration', 'average_duration']])

data_job_duration_dist = {
    'labels': job_types,
    'data': duration_stats['average_duration'].tolist()
}

# Save data to JSON file
with open('../public/analytics/job-duration-dist-three-month.json', 'w') as file:
    json.dump(data_job_duration_dist, file, indent=4) 

      category  frequency  total_duration  average_duration
0  Electrician        248           21765              88.0
1  Ventilation        121           22845             189.0
2      Plumber        222           29940             135.0
3     Handyman        243           10725              45.0
4       Aircon        204           96870             475.0


In [13]:
# Past 6 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=6)]
duration_stats = recent_jobs.groupby('category').agg(
    total_duration=('duration', 'sum'),
    frequency=('category', 'count')
).reset_index()
duration_stats['average_duration'] = np.ceil(duration_stats['total_duration'] / duration_stats['frequency'])
duration_stats['category'] = pd.Categorical(duration_stats['category'].str.capitalize(), categories=job_types, ordered=True)
duration_stats = duration_stats.sort_values('category').reset_index(drop=True)
print(duration_stats[['category', 'frequency', 'total_duration', 'average_duration']])

data_job_duration_dist = {
    'labels': job_types,
    'data': duration_stats['average_duration'].tolist()
}

# Save data to JSON file
with open('../public/analytics/job-duration-dist-six-month.json', 'w') as file:
    json.dump(data_job_duration_dist, file, indent=4) 

      category  frequency  total_duration  average_duration
0  Electrician        461           40995              89.0
1  Ventilation        300           57180             191.0
2      Plumber        396           53850             136.0
3     Handyman        605           26475              44.0
4       Aircon        385          182490             474.0


In [14]:
# Past 12 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=12)]
duration_stats = recent_jobs.groupby('category').agg(
    total_duration=('duration', 'sum'),
    frequency=('category', 'count')
).reset_index()
duration_stats['average_duration'] = np.ceil(duration_stats['total_duration'] / duration_stats['frequency'])
duration_stats['category'] = pd.Categorical(duration_stats['category'].str.capitalize(), categories=job_types, ordered=True)
duration_stats = duration_stats.sort_values('category').reset_index(drop=True)
print(duration_stats[['category', 'frequency', 'total_duration', 'average_duration']])

data_job_duration_dist = {
    'labels': job_types,
    'data': duration_stats['average_duration'].tolist()
}

# Save data to JSON file
with open('../public/analytics/job-duration-dist-twelve-month.json', 'w') as file:
    json.dump(data_job_duration_dist, file, indent=4) 

      category  frequency  total_duration  average_duration
0  Electrician       1011           90345              90.0
1  Ventilation        619          117270             190.0
2      Plumber        762          102870             135.0
3     Handyman       1250           55440              45.0
4       Aircon        843          397740             472.0


#### Job Revenue Distribution

In [15]:
# Past 1 Month
total_job_revenue = [8000, 6000, 6000, 8500, 22000]

data_job_revenue_dist = {
    'labels': job_types,
    'data': total_job_revenue
}

# Save data to JSON file
with open('../public/analytics/job-revenue-dist-one-month-total.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)

average_job_revenue = total_job_revenue

data_job_revenue_dist = {
    'labels': job_types,
    'data': average_job_revenue
}

# Save data to JSON file
with open('../public/analytics/job-revenue-dist-one-month-average.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)  

In [16]:
# Past 3 Month
total_job_revenue = [24900, 17100, 15000, 24000, 51300]

data_job_revenue_dist = {
    'labels': job_types,
    'data': total_job_revenue
}

# Save data to JSON file
with open('../public/analytics/job-revenue-dist-three-month-total.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)

average_job_revenue = [round(x / 3, 2) for x in total_job_revenue]

data_job_revenue_dist = {
    'labels': job_types,
    'data': average_job_revenue
}

# Save data to JSON file
with open('../public/analytics/job-revenue-dist-three-month-average.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)  

In [17]:
# Past 6 Month
total_job_revenue = [44900, 29700, 27000, 44000, 93300]

data_job_revenue_dist = {
    'labels': job_types,
    'data': total_job_revenue
}

# Save data to JSON file
with open('../public/analytics/job-revenue-dist-six-month-total.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)

average_job_revenue = [round(x / 6, 2) for x in total_job_revenue]

data_job_revenue_dist = {
    'labels': job_types,
    'data': average_job_revenue
}

# Save data to JSON file
with open('../public/analytics/job-revenue-dist-six-month-average.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)  

In [18]:
# Past 12 Month
total_job_revenue = [82700, 51300, 48600, 81300, 155700]

data_job_revenue_dist = {
    'labels': job_types,
    'data': total_job_revenue
}

# Save data to JSON file
with open('../public/analytics/job-revenue-dist-twelve-month-total.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)

average_job_revenue = [round(x / 12, 2) for x in total_job_revenue]

data_job_revenue_dist = {
    'labels': job_types,
    'data': average_job_revenue
}

# Save data to JSON file
with open('../public/analytics/job-revenue-dist-twelve-month-average.json', 'w') as file:
    json.dump(data_job_revenue_dist, file, indent=4)  

#### Job Delay Distribution

In [19]:
# Past 1 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=1)]
delay_stats = recent_jobs.groupby('category').agg(
    total_delay=('delay', 'sum'),
    frequency=('category', 'count')
).reset_index()
delay_stats['average_delay'] = np.ceil(delay_stats['total_delay'] / delay_stats['frequency'])
delay_stats['category'] = pd.Categorical(delay_stats['category'].str.capitalize(), categories=job_types, ordered=True)
delay_stats = delay_stats.sort_values('category').reset_index(drop=True)
print(delay_stats[['category', 'frequency', 'total_delay', 'average_delay']])

data_job_delay_dist = {
    'labels': job_types,
    'data': delay_stats['average_delay'].tolist()
}

# Save data to JSON file
with open('../public/analytics/job-delay-dist-one-month.json', 'w') as file:
    json.dump(data_job_delay_dist, file, indent=4) 

      category  frequency  total_delay  average_delay
0  Electrician        152         2280           15.0
1  Ventilation         75         3180           43.0
2      Plumber         57         1590           28.0
3     Handyman        146         1140            8.0
4       Aircon        126         7620           61.0


In [20]:
# Past 3 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=3)]
delay_stats = recent_jobs.groupby('category').agg(
    total_delay=('delay', 'sum'),
    frequency=('category', 'count')
).reset_index()
delay_stats['average_delay'] = np.ceil(delay_stats['total_delay'] / delay_stats['frequency'])
delay_stats['category'] = pd.Categorical(delay_stats['category'].str.capitalize(), categories=job_types, ordered=True)
delay_stats = delay_stats.sort_values('category').reset_index(drop=True)
print(delay_stats[['category', 'frequency', 'total_delay', 'average_delay']])

data_job_delay_dist = {
    'labels': job_types,
    'data': delay_stats['average_delay'].tolist()
}

# Save data to JSON file
with open('../public/analytics/job-delay-dist-three-month.json', 'w') as file:
    json.dump(data_job_delay_dist, file, indent=4) 

      category  frequency  total_delay  average_delay
0  Electrician        248         3650           15.0
1  Ventilation        121         5020           42.0
2      Plumber        222         6360           29.0
3     Handyman        243         1840            8.0
4       Aircon        204        11820           58.0


In [21]:
# Past 6 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=6)]
delay_stats = recent_jobs.groupby('category').agg(
    total_delay=('delay', 'sum'),
    frequency=('category', 'count')
).reset_index()
delay_stats['average_delay'] = np.ceil(delay_stats['total_delay'] / delay_stats['frequency'])
delay_stats['category'] = pd.Categorical(delay_stats['category'].str.capitalize(), categories=job_types, ordered=True)
delay_stats = delay_stats.sort_values('category').reset_index(drop=True)
print(delay_stats[['category', 'frequency', 'total_delay', 'average_delay']])

data_job_delay_dist = {
    'labels': job_types,
    'data': delay_stats['average_delay'].tolist()
}

# Save data to JSON file
with open('../public/analytics/job-delay-dist-six-month.json', 'w') as file:
    json.dump(data_job_delay_dist, file, indent=4) 

      category  frequency  total_delay  average_delay
0  Electrician        461         7130           16.0
1  Ventilation        300        12360           42.0
2      Plumber        396        11670           30.0
3     Handyman        605         4570            8.0
4       Aircon        385        22440           59.0


In [22]:
# Past 12 Month
recent_jobs = ja_jobs_df[ja_jobs_df['date'] > datetime(2024, 10, 1) - pd.DateOffset(months=12)]
delay_stats = recent_jobs.groupby('category').agg(
    total_delay=('delay', 'sum'),
    frequency=('category', 'count')
).reset_index()
delay_stats['average_delay'] = np.ceil(delay_stats['total_delay'] / delay_stats['frequency'])
delay_stats['category'] = pd.Categorical(delay_stats['category'].str.capitalize(), categories=job_types, ordered=True)
delay_stats = delay_stats.sort_values('category').reset_index(drop=True)
print(delay_stats[['category', 'frequency', 'total_delay', 'average_delay']])

data_job_delay_dist = {
    'labels': job_types,
    'data': delay_stats['average_delay'].tolist()
}

# Save data to JSON file
with open('../public/analytics/job-delay-dist-twelve-month.json', 'w') as file:
    json.dump(data_job_delay_dist, file, indent=4) 

      category  frequency  total_delay  average_delay
0  Electrician       1011        15370           16.0
1  Ventilation        619        25500           42.0
2      Plumber        762        22800           30.0
3     Handyman       1250         9260            8.0
4       Aircon        843        49050           59.0


#### Job Revenue Forecasting

In [23]:
# Calculate the date 24 months ago from today
start_date = datetime(2024, 10, 1) - pd.DateOffset(months=24)

def forecast_revenue(df, seasonal_periods, forecast_horizon):
    model = ExponentialSmoothing(df['revenue'], seasonal='add', seasonal_periods=seasonal_periods)
    model_fit = model.fit()
    forecast = model_fit.forecast(forecast_horizon)
    return forecast

In [24]:
# Electrician
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': [6300, 6300, 6400, 5500, 7000, 7100, 6200, 5800, 6400, 7500, 7000, 6800,
                8000, 7400, 6900, 7700, 10000, 9000, 7300, 8500, 9300, 10400, 7000, 8000]
}
electrician_df = pd.DataFrame(historical_data)
electrician_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = electrician_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(electrician_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-electrician-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = electrician_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-electrician-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

  self._init_dates(dates, freq)


In [25]:
# Ventilation
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': [4600, 4300, 4800, 5600, 5200, 5100, 4700, 4700, 4800, 4100, 5200, 5300, 
                5500, 6400, 7000, 7800, 5200, 6000, 6000, 5500, 5200, 5800, 7500, 6700]
}
ventilation_df = pd.DataFrame(historical_data)
ventilation_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = ventilation_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(ventilation_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-ventilation-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = ventilation_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-ventilation-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

  self._init_dates(dates, freq)


In [26]:
# Plumber
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': [5200, 5100, 4700, 4700, 4600, 4300, 4800, 5600, 4800, 4100, 5200, 5300, 
                5200, 6000, 6000, 5500, 5500, 6400, 7000, 7800, 5200, 5800, 7500, 6700]
}
plumber_df = pd.DataFrame(historical_data)
plumber_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = plumber_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(plumber_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-plumber-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = plumber_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-plumber-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

  self._init_dates(dates, freq)


In [27]:
# Handyman
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': [6200, 5800, 6400, 7500, 7000, 6800, 6300, 6300, 6400, 5500, 7000, 7100, 
                7300, 8500, 9300, 10400, 7000, 8000, 8000, 7400, 6900, 7700, 10000, 9000]
}
handyman_df = pd.DataFrame(historical_data)
handyman_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = handyman_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(handyman_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-handyman-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = handyman_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-handyman-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

  self._init_dates(dates, freq)


In [28]:
# Aircon
historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'revenue': [12400, 11600, 12800, 11000, 14000, 14020, 12080, 15000, 14000, 13600, 12060, 12060, 
                14600, 17000, 13800, 15040, 20000, 18000, 18060, 20080, 14000, 16000, 16000, 14080]
}
aircon_df = pd.DataFrame(historical_data)
aircon_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = aircon_df['revenue'][-12:]
monthly_forecast = np.ceil(forecast_revenue(aircon_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-aircon-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = aircon_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['revenue'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_revenue(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-revenue-forecast-aircon-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

  self._init_dates(dates, freq)


#### Job Demand Forecast

In [29]:
ja_jobs_df['year'] = ja_jobs_df['date'].dt.year
ja_jobs_df['month'] = ja_jobs_df['date'].dt.month

def forecast_demand(df, seasonal_periods, forecast_horizon):
    model = ExponentialSmoothing(df['demand'], seasonal='add', seasonal_periods=seasonal_periods)
    model_fit = model.fit()
    forecast = model_fit.forecast(forecast_horizon)
    return forecast

In [30]:
job_counts = ja_jobs_df.groupby(['year', 'month', 'category']).size().reset_index(name='total_jobs')
print(job_counts)

     year  month     category  total_jobs
0    2022     10       aircon          53
1    2022     10  electrician          95
2    2022     10     handyman          83
3    2022     10      plumber          68
4    2022     10  ventilation          33
..    ...    ...          ...         ...
115  2024      9       aircon         128
116  2024      9  electrician         154
117  2024      9     handyman         154
118  2024      9      plumber          57
119  2024      9  ventilation          76

[120 rows x 4 columns]


     year  month     category  total_jobs
1    2022     10  electrician          73
6    2022     11  electrician          67
11   2022     12  electrician          37
16   2023      1  electrician          22
21   2023      2  electrician          46
26   2023      3  electrician          35
31   2023      4  electrician          68
36   2023      5  electrician          26
41   2023      6  electrician          42
46   2023      7  electrician          52
51   2023      8  electrician          79
56   2023      9  electrician          45
61   2023     10  electrician         111
66   2023     11  electrician         101
71   2023     12  electrician          56
76   2024      1  electrician          34
81   2024      2  electrician          69
86   2024      3  electrician          53
91   2024      4  electrician         103
96   2024      5  electrician          40
101  2024      6  electrician          62
106  2024      7  electrician          78
111  2024      8  electrician     

In [31]:
# Electrician
electrician_counts = job_counts[job_counts['category'] == 'electrician']
print(electrician_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': electrician_counts['total_jobs'].tolist()
}
electrician_df = pd.DataFrame(historical_data)
electrician_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = electrician_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(electrician_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-electrician-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = electrician_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-electrician-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 


     year  month     category  total_jobs
1    2022     10  electrician          95
6    2022     11  electrician          86
11   2022     12  electrician          48
16   2023      1  electrician          29
21   2023      2  electrician          59
26   2023      3  electrician          45
31   2023      4  electrician          88
36   2023      5  electrician          34
41   2023      6  electrician          54
46   2023      7  electrician          66
51   2023      8  electrician         102
56   2023      9  electrician          56
61   2023     10  electrician          93
66   2023     11  electrician          69
71   2023     12  electrician          94
76   2024      1  electrician          33
81   2024      2  electrician         131
86   2024      3  electrician         135
91   2024      4  electrician         106
96   2024      5  electrician          25
101  2024      6  electrician          84
106  2024      7  electrician          54
111  2024      8  electrician     

  self._init_dates(dates, freq)


In [32]:
# Ventilation
ventilation_counts = job_counts[job_counts['category'] == 'ventilation']
print(ventilation_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': ventilation_counts['total_jobs'].tolist()
}
ventilation_df = pd.DataFrame(historical_data)
ventilation_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = ventilation_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(ventilation_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-ventilation-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = ventilation_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-ventilation-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

     year  month     category  total_jobs
4    2022     10  ventilation          33
9    2022     11  ventilation          24
14   2022     12  ventilation          34
19   2023      1  ventilation          11
24   2023      2  ventilation          46
29   2023      3  ventilation          49
34   2023      4  ventilation          40
39   2023      5  ventilation           9
44   2023      6  ventilation          29
49   2023      7  ventilation          19
54   2023      8  ventilation          14
59   2023      9  ventilation          54
64   2023     10  ventilation          69
69   2023     11  ventilation          53
74   2023     12  ventilation          59
79   2024      1  ventilation          69
84   2024      2  ventilation          19
89   2024      3  ventilation          49
94   2024      4  ventilation          69
99   2024      5  ventilation          69
104  2024      6  ventilation          44
109  2024      7  ventilation          14
114  2024      8  ventilation     

  self._init_dates(dates, freq)


In [33]:
# Plumber
plumber_counts = job_counts[job_counts['category'] == 'plumber']
print(plumber_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': plumber_counts['total_jobs'].tolist()
}
plumber_df = pd.DataFrame(historical_data)
plumber_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = plumber_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(plumber_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-plumber-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = plumber_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-plumber-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

     year  month category  total_jobs
3    2022     10  plumber          68
8    2022     11  plumber          53
13   2022     12  plumber          59
18   2023      1  plumber          68
23   2023      2  plumber          19
28   2023      3  plumber          49
33   2023      4  plumber          69
38   2023      5  plumber          68
43   2023      6  plumber          43
48   2023      7  plumber          13
53   2023      8  plumber          31
58   2023      9  plumber          75
63   2023     10  plumber          95
68   2023     11  plumber          87
73   2023     12  plumber          48
78   2024      1  plumber          29
83   2024      2  plumber          60
88   2024      3  plumber          45
93   2024      4  plumber          88
98   2024      5  plumber          34
103  2024      6  plumber          54
108  2024      7  plumber          67
113  2024      8  plumber         103
118  2024      9  plumber          57


  self._init_dates(dates, freq)


In [34]:
# Handyman
handyman_counts = job_counts[job_counts['category'] == 'handyman']
print(handyman_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': handyman_counts['total_jobs'].tolist()
}
handyman_df = pd.DataFrame(historical_data)
handyman_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = handyman_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(handyman_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-handyman-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = handyman_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-handyman-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

     year  month  category  total_jobs
2    2022     10  handyman          83
7    2022     11  handyman          62
12   2022     12  handyman          84
17   2023      1  handyman          29
22   2023      2  handyman         117
27   2023      3  handyman         120
32   2023      4  handyman          95
37   2023      5  handyman          23
42   2023      6  handyman          75
47   2023      7  handyman          48
52   2023      8  handyman          36
57   2023      9  handyman         137
62   2023     10  handyman         140
67   2023     11  handyman         107
72   2023     12  handyman         120
77   2024      1  handyman         140
82   2024      2  handyman          40
87   2024      3  handyman          99
92   2024      4  handyman         140
97   2024      5  handyman         137
102  2024      6  handyman          89
107  2024      7  handyman          27
112  2024      8  handyman          63
117  2024      9  handyman         154


  self._init_dates(dates, freq)


In [35]:
# Aircon
aircon_counts = job_counts[job_counts['category'] == 'aircon']
print(aircon_counts)

historical_data = {
    'date': pd.date_range(start=start_date, end=datetime(2024, 10, 1), freq='M'),
    'demand': aircon_counts['total_jobs'].tolist()
}
aircon_df = pd.DataFrame(historical_data)
aircon_df.set_index('date', inplace=True)

forecast_months = 6

# Monthly
historical_demand = aircon_df['demand'][-12:]
monthly_forecast = np.ceil(forecast_demand(aircon_df, seasonal_periods=12, forecast_horizon=forecast_months)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, monthly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(monthly_forecast.index[i].strftime("%Y-%m")) for i in range(len(monthly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_monthly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-aircon-month.json', 'w') as file:
    json.dump(data_job_demand_forecast_monthly, file, indent=4) 

# Quarterly 
df_quarterly = aircon_df.resample('Q').sum()
historical_demand = np.ceil(df_quarterly['demand'][-6:]).astype(int)
quarterly_forecast = np.ceil(forecast_demand(df_quarterly, seasonal_periods=4, forecast_horizon=forecast_months // 3)).astype(int)
combined_demand_forecast = np.concatenate((historical_demand, quarterly_forecast))

historical_labels = [historical_demand.index[i].strftime("%Y-%m") for i in range(len(historical_demand))]
forecasted_labels = [(quarterly_forecast.index[i].strftime("%Y-%m")) for i in range(len(quarterly_forecast))]
combined_labels = historical_labels + forecasted_labels

data_job_demand_forecast_quarterly = {
    'labels': combined_labels,
    'data': combined_demand_forecast.tolist()
}

with open('../public/analytics/job-demand-forecast-aircon-quarter.json', 'w') as file:
    json.dump(data_job_demand_forecast_quarterly, file, indent=4) 

     year  month category  total_jobs
0    2022     10   aircon          53
5    2022     11   aircon          41
10   2022     12   aircon          46
15   2023      1   aircon          53
20   2023      2   aircon          15
25   2023      3   aircon          38
30   2023      4   aircon          53
35   2023      5   aircon          53
40   2023      6   aircon          34
45   2023      7   aircon          11
50   2023      8   aircon          24
55   2023      9   aircon          59
60   2023     10   aircon          78
65   2023     11   aircon          57
70   2023     12   aircon          78
75   2024      1   aircon          27
80   2024      2   aircon         109
85   2024      3   aircon         113
90   2024      4   aircon          89
95   2024      5   aircon          21
100  2024      6   aircon          70
105  2024      7   aircon          45
110  2024      8   aircon          33
115  2024      9   aircon         128


  self._init_dates(dates, freq)
