In [55]:
import os
import numpy as np
import pandas as pd
from itertools import product
from sklearn.preprocessing import PolynomialFeatures

# Ensure the `data` directory exists
os.makedirs("data", exist_ok=True)

# Study parameters
n_samples_per_combination = 10000  # Samples for each treatment combination
n_treatments = 7  # Number of binary treatments (t1, t2, ..., tn)
sigma = 5.0  # Standard deviation of the noise

print("Generating random polynomial g(t1, ..., tn)...")

# Generate a random polynomial of order 2 for g(t1, ..., tn)
np.random.seed(42)
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
treatment_combinations = np.array(list(product([0, 1], repeat=n_treatments)))
poly_features = poly.fit_transform(treatment_combinations)
coefficients = np.random.uniform(-10, 10, poly_features.shape[1])

def g(t):
    poly_t = poly.fit_transform(np.array(t).reshape(1, -1))
    return np.dot(poly_t, coefficients).item()

print(f"Polynomial coefficients: {coefficients}")

# Generate treatments and outcomes for each combination
print("Generating treatments and outcomes for all possible combinations...")
all_treatments = []
all_outcomes = []

for t_comb in treatment_combinations:
    print(t_comb)
    treatments = np.tile(t_comb, (n_samples_per_combination, 1))
    noise = np.random.normal(0, sigma, size=n_samples_per_combination)
    outcomes = np.array([g(t) for t in treatments]) + noise
    all_treatments.append(treatments)
    all_outcomes.append(outcomes)

# Combine all data
all_treatments = np.vstack(all_treatments)
all_outcomes = np.hstack(all_outcomes)

# Construct the dataset
print("Constructing the dataset...")
data = pd.DataFrame(all_treatments, columns=[f"t{i+1}" for i in range(n_treatments)])
data["Y"] = all_outcomes

# Save the dataset to a CSV file
file_name = "data/simulation_data_with_combinations.csv"
data.to_csv(file_name, index=False)
print(f"Dataset saved to {file_name}")
print("Sample of the dataset:")
print(data.head())

# Compute E(Y | t1, ..., tn) for all combinations of t1, ..., tn
print("Computing E(Y | t1, ..., tn) for all combinations...")
conditional_expectations = []

for t_comb in treatment_combinations:
    mask = (all_treatments == t_comb).all(axis=1)
    if mask.sum() > 0:
        conditional_expectations.append(all_outcomes[mask].mean())
    else:
        conditional_expectations.append(float("-inf"))  # Penalize unobserved combinations

# Optimize: Find the combination of treatments to maximize E(Y | t1, ..., tn)
print("Finding the optimal treatment combination...")
best_treatment_index = np.argmax(conditional_expectations)
best_treatment = treatment_combinations[best_treatment_index]
max_expected_y = conditional_expectations[best_treatment_index]

# Solver results
print("\nSolver Results:")
print(f"Best Treatment Combination: {best_treatment}")
print(f"Maximum Expected Y: {max_expected_y}")

Generating random polynomial g(t1, ..., tn)...
Polynomial coefficients: [-2.50919762  9.01428613  4.63987884  1.97316968 -6.87962719 -6.88010959
 -8.83832776  7.32352292  2.02230023  4.16145156 -9.58831011  9.39819704
  6.64885282 -5.75321779 -6.36350066 -6.3319098  -3.91515514  0.49512863
 -1.36109963 -4.1754172   2.23705789 -7.21012279 -4.15710703 -2.67276313
 -0.87860032  5.70351923 -6.00652436  0.28468877  1.84829138 -9.07099175
  2.15089704 -6.58951753 -8.69896814  8.97771075  9.31264066]
Generating treatments and outcomes for all possible combinations...
[0 0 0 0 0 0 0]
[0 0 0 0 0 0 1]
[0 0 0 0 0 1 0]
[0 0 0 0 0 1 1]
[0 0 0 0 1 0 0]
[0 0 0 0 1 0 1]
[0 0 0 0 1 1 0]
[0 0 0 0 1 1 1]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 1]
[0 0 0 1 0 1 0]
[0 0 0 1 0 1 1]
[0 0 0 1 1 0 0]
[0 0 0 1 1 0 1]
[0 0 0 1 1 1 0]
[0 0 0 1 1 1 1]
[0 0 1 0 0 0 0]
[0 0 1 0 0 0 1]
[0 0 1 0 0 1 0]
[0 0 1 0 0 1 1]
[0 0 1 0 1 0 0]
[0 0 1 0 1 0 1]
[0 0 1 0 1 1 0]
[0 0 1 0 1 1 1]
[0 0 1 1 0 0 0]
[0 0 1 1 0 0 1]
[0 0 1 1 0 1 0]
[

In [49]:
# Study parameters for the sample dataset
n_sampled_points = 100  # Number of samples to create for the smaller dataset

print("Creating a smaller sample dataset using the same DGP...")

# Generate treatments randomly for the smaller sample
sampled_treatments = np.random.randint(0, 2, size=(n_sampled_points, n_treatments))

# Generate noise for the smaller sample
sampled_e = np.random.normal(0, sigma, size=n_sampled_points)

# Generate outcomes Y for the smaller sample
sampled_Y = np.array([g(t) for t in sampled_treatments]) + sampled_e

# Construct the smaller dataset
sampled_data = pd.DataFrame(sampled_treatments, columns=[f"t{i+1}" for i in range(n_treatments)])
sampled_data["Y"] = sampled_Y

# Save the smaller dataset
sample_file_name = "data/sampled_simulation_data.csv"
sampled_data.to_csv(sample_file_name, index=False)
print(f"Sample dataset saved to {sample_file_name}")
print("Sample of the smaller dataset:")
print(sampled_data.head())

Creating a smaller sample dataset using the same DGP...
Sample dataset saved to data/sampled_simulation_data.csv
Sample of the smaller dataset:
   t1  t2  t3  t4  t5  t6  t7          Y
0   0   1   1   0   0   0   1  -2.146140
1   0   0   0   1   0   1   1   1.403147
2   0   0   0   1   1   0   0 -15.020936
3   0   1   0   1   0   1   1  -3.479224
4   1   0   1   1   1   0   0 -11.294713


In [50]:
from sklearn.linear_model import LinearRegression

print("Estimating g(t1, ..., tn) using simple linear regression...")

# Split features and target from the smaller dataset
X_sample = sampled_data[[f"t{i+1}" for i in range(n_treatments)]]
y_sample = sampled_data["Y"]

# Fit a linear regression model
linear_model = LinearRegression()
linear_model.fit(X_sample, y_sample)

# Coefficients of the linear regression model
print("Linear regression coefficients:")
for i, coef in enumerate(linear_model.coef_):
    print(f"t{i+1}: {coef:.4f}")
print(f"Intercept: {linear_model.intercept_:.4f}")

# Predict E(Y | t1, ..., tn) for all treatment combinations using the fitted model
print("Predicting E(Y | t1, ..., tn) for all combinations...")
predicted_expectations = linear_model.predict(treatment_combinations)

# Solve for the optimal treatment mix using integer programming
print("Finding the optimal treatment combination using linear regression...")
optimal_treatment_index = np.argmax(predicted_expectations)
optimal_treatment = treatment_combinations[optimal_treatment_index]
optimal_expected_y = predicted_expectations[optimal_treatment_index]

# Display solver results
print("\nSolver Results using Linear Regression:")
print(f"Optimal Treatment Combination: {optimal_treatment}")
print(f"Maximum Predicted E(Y): {optimal_expected_y}")

Estimating g(t1, ..., tn) using simple linear regression...
Linear regression coefficients:
t1: 9.4701
t2: -4.1692
t3: -4.3443
t4: -3.4666
t5: -18.8161
t6: -7.3785
t7: -0.0177
Intercept: 3.8740
Predicting E(Y | t1, ..., tn) for all combinations...
Finding the optimal treatment combination using linear regression...

Solver Results using Linear Regression:
Optimal Treatment Combination: [1 0 0 0 0 0 0]
Maximum Predicted E(Y): 13.34412668087277




In [54]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

print("Estimating g(t1, ..., tn) using polynomial Lasso regression...")

# Set up a pipeline for polynomial features and Lasso regression
lasso_alpha = 0.01  # Strong penalty (tunable parameter)
degree = 3  # Polynomial degree

lasso_model = Pipeline([
    ('poly', PolynomialFeatures(degree=degree, include_bias=False)),
    ('lasso', Lasso(alpha=lasso_alpha, max_iter=10000))
])

# Fit the Lasso regression model
lasso_model.fit(X_sample, y_sample)

# Coefficients of the Lasso regression model
lasso_coefficients = lasso_model.named_steps['lasso'].coef_
lasso_intercept = lasso_model.named_steps['lasso'].intercept_
print("Lasso regression coefficients (non-zero terms):")
for i, coef in enumerate(lasso_coefficients):
    if coef != 0:
        print(f"Feature {i+1}: {coef:.4f}")
print(f"Intercept: {lasso_intercept:.4f}")

# Predict E(Y | t1, ..., tn) for all treatment combinations using the fitted Lasso model
print("Predicting E(Y | t1, ..., tn) for all combinations using Lasso regression...")
predicted_expectations_lasso = lasso_model.predict(treatment_combinations)

# Solve for the optimal treatment mix using integer programming
print("Finding the optimal treatment combination using Lasso regression...")
optimal_treatment_index_lasso = np.argmax(predicted_expectations_lasso)
optimal_treatment_lasso = treatment_combinations[optimal_treatment_index_lasso]
optimal_expected_y_lasso = predicted_expectations_lasso[optimal_treatment_index_lasso]

# Display solver results
print("\nSolver Results using Polynomial Lasso Regression:")
print(f"Optimal Treatment Combination: {optimal_treatment_lasso}")
print(f"Maximum Predicted E(Y): {optimal_expected_y_lasso}")

Estimating g(t1, ..., tn) using polynomial Lasso regression...
Lasso regression coefficients (non-zero terms):
Feature 1: 2.8411
Feature 5: -16.2384
Feature 6: -11.3262
Feature 8: 0.4395
Feature 9: 1.7554
Feature 10: 0.9675
Feature 11: -3.6590
Feature 12: 6.9005
Feature 13: 5.9959
Feature 14: -2.5860
Feature 15: 0.0816
Feature 16: -4.5500
Feature 17: -0.1987
Feature 20: -1.2368
Feature 22: -5.2311
Feature 23: -1.7287
Feature 24: -0.3725
Feature 26: 0.9717
Feature 27: -5.0833
Feature 29: 1.7109
Feature 30: -0.2989
Feature 31: 0.0856
Feature 32: -4.6204
Feature 33: -2.0527
Feature 34: 5.1453
Feature 36: 0.0612
Feature 38: 1.8395
Feature 39: -3.2927
Feature 40: 1.2632
Feature 41: 0.8463
Feature 42: -1.4088
Feature 45: -0.5576
Feature 46: 0.7643
Feature 49: 1.4316
Feature 51: 0.1297
Feature 54: -0.9008
Feature 57: -1.1862
Feature 58: 0.0891
Feature 59: 1.5550
Feature 63: -0.3162
Feature 65: -0.0447
Feature 69: -1.9866
Feature 70: -0.0240
Feature 71: -1.4063
Feature 74: -0.5812
Feature 76: 

