# SLSQP OPTIMIZATION



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize

## Late adopters high uptake scenario


In [None]:
# Load the Excel file (adjust the path accordingly)
file_path_cluster = #INSERT PATH#
data_cluster = pd.read_excel(file_path_cluster)

# Filter data to include only rows where ClusterLabel is 0 and drop unnecessary columns
df_cluster_0 = data_cluster[data_cluster['ClusterLabel'] == 0].drop(columns=['ClusterLabel', 'HHindex'])

# Define the number of states (tiers)
num_states_cluster0 = 5

# Initialize lists to store the distributions for each year
distributions_cluster0 = []
distributions_percentages_cluster0 = []

# Extract the real distributions for the first 5 years
num_real_years = 5  # First 5 years will use real data
for year in range(num_real_years):
    distribution_cluster0 = df_cluster_0.iloc[:, year].value_counts().reindex(range(1, num_states_cluster0 + 1), fill_value=0).values
    distributions_cluster0.append(distribution_cluster0)
    distribution_percentage_cluster0 = (distribution_cluster0 / distribution_cluster0.sum()) * 100
    distributions_percentages_cluster0.append(distribution_percentage_cluster0)

# Define logistic curve function for target average tier values
def logistic_curve_cluster0(x):
    return 5 / (1 + np.exp(-0.308 * (x - 6.545)))

# Function to calculate the average tier
def calculate_average_tier_cluster0(distribution_cluster0):
    tiers = np.arange(1, len(distribution_cluster0) + 1)
    return np.dot(distribution_cluster0, tiers) / np.sum(distribution_cluster0)

# Objective function to align with the logistic curve
def objective_function_cluster0(next_distribution, target_average_tier, previous_distribution, lambda_smooth=0.02, lambda_balance=0.02):
    average_tier = calculate_average_tier_cluster0(next_distribution)
    logistic_objective = (average_tier - target_average_tier) ** 2
    smoothness_penalty = lambda_smooth * np.sum((next_distribution - previous_distribution) ** 2)
    balance_penalty = lambda_balance * np.sum((next_distribution - next_distribution.mean()) ** 2)
    return logistic_objective + smoothness_penalty + balance_penalty

# Optimization function
def optimize_distribution_cluster0(target_average_tier, previous_distribution):
    constraints = [
        {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},  # Sum to 1
        {'type': 'ineq', 'fun': lambda x: previous_distribution[0] - x[0]},  # Tier 1 can only decrease or stay the same
        {'type': 'ineq', 'fun': lambda x: x[4] - previous_distribution[4]},  # Tier 5 can only increase or stay the same
        {'type': 'ineq', 'fun': lambda x: x}  # Values must be non-negative
    ]
    
    initial_guess = previous_distribution / np.sum(previous_distribution)

    result = minimize(
        objective_function_cluster0,
        initial_guess,
        args=(target_average_tier, previous_distribution),
        bounds=[(0, 1) for _ in range(num_states_cluster0)],
        constraints=constraints,
        method='SLSQP'
    )

    if not result.success:
        print(f"Optimization failed: {result.message}")
    return result.x

# List to store predicted distributions
predicted_distributions_cluster0 = distributions_percentages_cluster0.copy()  # First 5 years use real data

# Print the real distribution of tiers for the first years
for year, distribution in enumerate(distributions_percentages_cluster0[:5], start=1):
    print(f"Year {year}: {np.round(distribution, 2)}%")

# Optimization starts from Year 6 using Year 5 as the last real data point
for year in range(6, 21):  # Optimize from Year 6 to 20
    target_average_tier = logistic_curve_cluster0(year)
    previous_distribution = np.array(predicted_distributions_cluster0[4]) / np.sum(predicted_distributions_cluster0[4])
    
    next_distribution = optimize_distribution_cluster0(
        target_average_tier,
        previous_distribution
    )
    
    predicted_distributions_cluster0.append(next_distribution * 100)  # Convert to percentage

    print(f"Predicted Distribution for Year {year}: {np.round(next_distribution * 100, 2)}%")

# Calculate average tier values for each year
average_tiers = [calculate_average_tier_cluster0(dist) for dist in predicted_distributions_cluster0]

# Generate years for the logistic curve and calculate logistic target values
years = np.arange(1, 21)  # Years 1 to 20
logistic_targets = [logistic_curve_cluster0(x) for x in years]




# Late adopters medium uptake scenario

In [None]:
# Initialize lists to store the distributions for each year
distributions_cluster0 = []
distributions_percentages_cluster0 = []


for year in range(num_real_years):
    distribution_cluster0 = df_cluster_0.iloc[:, year].value_counts().reindex(range(1, num_states_cluster0 + 1), fill_value=0).values
    distributions_cluster0.append(distribution_cluster0)
    distribution_percentage_cluster0 = (distribution_cluster0 / distribution_cluster0.sum()) * 100
    distributions_percentages_cluster0.append(distribution_percentage_cluster0)

# Logistic curve function for target average tier values
def logistic_curve_cluster0(x):
    return 4.5 / (1 + np.exp(-0.319 * (x - 5.963)))

# Function to calculate the average tier
def calculate_average_tier_cluster0(distribution_cluster0):
    tiers = np.arange(1, len(distribution_cluster0) + 1)
    return np.dot(distribution_cluster0, tiers) / np.sum(distribution_cluster0)

# Objective function to align with the logistic curve
def objective_function_cluster0(next_distribution, target_average_tier, previous_distribution, lambda_smooth=0.02, lambda_balance=0.02):
    average_tier = calculate_average_tier_cluster0(next_distribution)
    logistic_objective = (average_tier - target_average_tier) ** 2
    smoothness_penalty = lambda_smooth * np.sum((next_distribution - previous_distribution) ** 2)
    balance_penalty = lambda_balance * np.sum((next_distribution - next_distribution.mean()) ** 2)
    return logistic_objective + smoothness_penalty + balance_penalty

# Optimization function
def optimize_distribution_cluster0(target_average_tier, previous_distribution):
    constraints = [
        {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},  # Sum to 1
        {'type': 'ineq', 'fun': lambda x: previous_distribution[0] - x[0]},  # Tier 1 can only decrease or stay the same
        {'type': 'ineq', 'fun': lambda x: x[4] - previous_distribution[4]},  # Tier 5 can only increase or stay the same
        {'type': 'ineq', 'fun': lambda x: x}  # Values must be non-negative
    ]
    
    initial_guess = previous_distribution / np.sum(previous_distribution)

    result = minimize(
        objective_function_cluster0,
        initial_guess,
        args=(target_average_tier, previous_distribution),
        bounds=[(0, 1) for _ in range(num_states_cluster0)],
        constraints=constraints,
        method='SLSQP'
    )

    if not result.success:
        print(f"Optimization failed: {result.message}")
    return result.x

# List to store predicted distributions
predicted_distributions_cluster0 = distributions_percentages_cluster0.copy()  # First 5 years use real data

# Print the real distribution of tiers for the first years
for year, distribution in enumerate(distributions_percentages_cluster0[:5], start=1):
    print(f"Year {year}: {np.round(distribution, 2)}%")

# Optimization starts from Year 6 using Year 5 as the last real data point
for year in range(6, 21):  # Optimize from Year 6 to 20
    target_average_tier = logistic_curve_cluster0(year)
    previous_distribution = np.array(predicted_distributions_cluster0[4]) / np.sum(predicted_distributions_cluster0[4])
    
    next_distribution = optimize_distribution_cluster0(
        target_average_tier,
        previous_distribution
    )
    
    predicted_distributions_cluster0.append(next_distribution * 100)  # Convert to percentage

    print(f"Predicted Distribution for Year {year}: {np.round(next_distribution * 100, 2)}%")

# Calculate average tier values for each year
average_tiers = [calculate_average_tier_cluster0(dist) for dist in predicted_distributions_cluster0]

# Generate years for the logistic curve and calculate logistic target values
years = np.arange(1, 21)  # Years 1 to 20
logistic_targets = [logistic_curve_cluster0(x) for x in years]


## Late adopters low uptake scenario


In [None]:
# Initialize lists to store the distributions for each year
distributions_cluster0 = []
distributions_percentages_cluster0 = []

# Extract the real distributions for the first 5 years
num_real_years = 5  # First 5 years will use real data
for year in range(num_real_years):
    distribution_cluster0 = df_cluster_0.iloc[:, year].value_counts().reindex(range(1, num_states_cluster0 + 1), fill_value=0).values
    distributions_cluster0.append(distribution_cluster0)
    distribution_percentage_cluster0 = (distribution_cluster0 / distribution_cluster0.sum()) * 100
    distributions_percentages_cluster0.append(distribution_percentage_cluster0)

# Logistic curve function for target average tier values
def logistic_curve_cluster0(x):
    return 4 / (1 + np.exp(-0.333 * (x - 5.315)))

# Function to calculate the average tier
def calculate_average_tier_cluster0(distribution_cluster0):
    tiers = np.arange(1, len(distribution_cluster0) + 1)
    return np.dot(distribution_cluster0, tiers) / np.sum(distribution_cluster0)

# Objective function to align with the logistic curve
def objective_function_cluster0(next_distribution, target_average_tier, previous_distribution, lambda_smooth=0.02, lambda_balance=0.02):
    average_tier = calculate_average_tier_cluster0(next_distribution)
    logistic_objective = (average_tier - target_average_tier) ** 2
    smoothness_penalty = lambda_smooth * np.sum((next_distribution - previous_distribution) ** 2)
    balance_penalty = lambda_balance * np.sum((next_distribution - next_distribution.mean()) ** 2)
    return logistic_objective + smoothness_penalty + balance_penalty

# Optimization function
def optimize_distribution_cluster0(target_average_tier, previous_distribution):
    constraints = [
        {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},  # Sum to 1
        {'type': 'ineq', 'fun': lambda x: previous_distribution[0] - x[0]},  # Tier 1 can only decrease or stay the same
        {'type': 'ineq', 'fun': lambda x: x[4] - previous_distribution[4]},  # Tier 5 can only increase or stay the same
        {'type': 'ineq', 'fun': lambda x: x}  # Values must be non-negative
    ]
    
    initial_guess = previous_distribution / np.sum(previous_distribution)

    result = minimize(
        objective_function_cluster0,
        initial_guess,
        args=(target_average_tier, previous_distribution),
        bounds=[(0, 1) for _ in range(num_states_cluster0)],
        constraints=constraints,
        method='SLSQP'
    )

    if not result.success:
        print(f"Optimization failed: {result.message}")
    return result.x

# List to store predicted distributions
predicted_distributions_cluster0 = distributions_percentages_cluster0.copy()  # First 5 years use real data

# Print the real distribution of tiers for the first years
for year, distribution in enumerate(distributions_percentages_cluster0[:5], start=1):
    print(f"Year {year}: {np.round(distribution, 2)}%")

# Optimization starts from Year 6 using Year 5 as the last real data point
for year in range(6, 21):  # Optimize from Year 6 to 20
    target_average_tier = logistic_curve_cluster0(year)
    previous_distribution = np.array(predicted_distributions_cluster0[4]) / np.sum(predicted_distributions_cluster0[4])
    
    next_distribution = optimize_distribution_cluster0(
        target_average_tier,
        previous_distribution
    )
    
    predicted_distributions_cluster0.append(next_distribution * 100)  # Convert to percentage

    print(f"Predicted Distribution for Year {year}: {np.round(next_distribution * 100, 2)}%")

# Calculate average tier values for each year
average_tiers = [calculate_average_tier_cluster0(dist) for dist in predicted_distributions_cluster0]

# Generate years for the logistic curve and calculate logistic target values
years = np.arange(1, 21)  # Years 1 to 20
logistic_targets = [logistic_curve_cluster0(x) for x in years]


## Early adopters

In [None]:

# Filter data to include only rows where ClusterLabel is 1 and drop unnecessary columns
df_cluster_1 = data_cluster[data_cluster['ClusterLabel'] == 1].drop(columns=['ClusterLabel', 'HHindex'])

# Define the number of states (tiers)
num_states_cluster1 = 5

# Initialize lists to store the distributions for each year
distributions_cluster1 = []
distributions_percentages_cluster1 = []

# Extract the real distributions for the first 5 years
num_real_years = 5  # First 5 years will use real data
for year in range(num_real_years):
    distribution_cluster1 = df_cluster_1.iloc[:, year].value_counts().reindex(range(1, num_states_cluster1 + 1), fill_value=0).values
    distributions_cluster1.append(distribution_cluster1)
    distribution_percentage_cluster1 = (distribution_cluster1 / distribution_cluster1.sum()) * 100
    distributions_percentages_cluster1.append(distribution_percentage_cluster1)

# Logistic curve function for target average tier values
def logistic_curve_cluster1(x):
    return 5 / (1 + np.exp(-0.5641 * (x - 1.5137)))

# Function to calculate the average tier
def calculate_average_tier_cluster1(distribution_cluster1):
    tiers = np.arange(1, len(distribution_cluster1) + 1)
    return np.dot(distribution_cluster1, tiers) / np.sum(distribution_cluster1)

# Objective function to align with the logistic curve
def objective_function_cluster1(next_distribution, target_average_tier, previous_distribution, lambda_smooth=0.02, lambda_balance=0.02):
    average_tier = calculate_average_tier_cluster1(next_distribution)
    logistic_objective = (average_tier - target_average_tier) ** 2
    smoothness_penalty = lambda_smooth * np.sum((next_distribution - previous_distribution) ** 2)
    balance_penalty = lambda_balance * np.sum((next_distribution - next_distribution.mean()) ** 2)
    return logistic_objective + smoothness_penalty + balance_penalty

# Optimization function
def optimize_distribution_cluster1(target_average_tier, previous_distribution):
    constraints = [
        {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},  # Sum to 1
        {'type': 'ineq', 'fun': lambda x: previous_distribution[0] - x[0]},  # Tier 1 can only decrease or stay the same
        {'type': 'ineq', 'fun': lambda x: x[4] - previous_distribution[4]},  # Tier 5 can only increase or stay the same
        {'type': 'ineq', 'fun': lambda x: x}  # Values must be non-negative
    ]
    
    initial_guess = previous_distribution / np.sum(previous_distribution)

    result = minimize(
        objective_function_cluster1,
        initial_guess,
        args=(target_average_tier, previous_distribution),
        bounds=[(0, 1) for _ in range(num_states_cluster1)],
        constraints=constraints,
        method='SLSQP'
    )

    if not result.success:
        print(f"Optimization failed: {result.message}")
    return result.x

# List to store predicted distributions
predicted_distributions_cluster1 = distributions_percentages_cluster1.copy()  # First 5 years use real data

# Print the real distribution of tiers for the first years
for year, distribution in enumerate(distributions_percentages_cluster1[:5], start=1):
    print(f"Year {year}: {np.round(distribution, 2)}%")

# Optimization starts from Year 6 using Year 5 as the last real data point
for year in range(6, 21):  # Optimize from Year 6 to 20
    target_average_tier = logistic_curve_cluster1(year)
    previous_distribution = np.array(predicted_distributions_cluster1[4]) / np.sum(predicted_distributions_cluster1[4])
    
    next_distribution = optimize_distribution_cluster1(
        target_average_tier,
        previous_distribution
    )
    
    predicted_distributions_cluster1.append(next_distribution * 100)  # Convert to percentage

    print(f"Predicted Distribution for Year {year}: {np.round(next_distribution * 100, 2)}%")

# Calculate average tier values for each year
average_tiers = [calculate_average_tier_cluster1(dist) for dist in predicted_distributions_cluster1]

# Generate years for the logistic curve and calculate logistic target values
years = np.arange(1, 21)  # Years 1 to 20
logistic_targets = [logistic_curve_cluster1(x) for x in years]
