In [None]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(0)

# Define the number of samples
num_samples = 100

# Hypothetical weightages
weights = {'spring_precip': 0.15, 'winter_precip': 0.15, 'spring_area': 0.1, 'winter_area': 0.1,
           'surface_soil_moisture': 0.2, 'root_zone_soil_moisture': 0.2, 'sunlight_intensity': 0.1}

# Ideal values
ideal_conditions = {'spring_precip': 1328.38, 'winter_precip': 1312.48,
                    'soil_moisture': 0.25, 'sunlight_intensity': 400}

# Generate synthetic data
df = pd.DataFrame({
    'spring_precip': np.random.uniform(1200, 1400, num_samples),
    'winter_precip': np.random.uniform(1200, 1400, num_samples),
    'spring_area': np.random.choice([0, 1], num_samples, p=[0.05, 0.95]),
    'winter_area': np.random.choice([0, 1], num_samples, p=[0.05, 0.95]),
    'surface_soil_moisture': np.random.uniform(0.2, 0.3, num_samples),
    'root_zone_soil_moisture': np.random.uniform(0.2, 0.3, num_samples),
    'sunlight_intensity': np.random.uniform(200, 600, num_samples)
})

# Calculate the hypothetical discount score
df['discount_score'] = (
    weights['spring_precip'] * (1 - abs(ideal_conditions['spring_precip'] - df['spring_precip']) / ideal_conditions['spring_precip']) +
    weights['winter_precip'] * (1 - abs(ideal_conditions['winter_precip'] - df['winter_precip']) / ideal_conditions['winter_precip']) +
    weights['spring_area'] * df['spring_area'] +
    weights['winter_area'] * df['winter_area'] +
    weights['surface_soil_moisture'] * (1 - abs(ideal_conditions['soil_moisture'] - df['surface_soil_moisture']) / 0.1) +
    weights['root_zone_soil_moisture'] * (1 - abs(ideal_conditions['soil_moisture'] - df['root_zone_soil_moisture']) / 0.1) +
    weights['sunlight_intensity'] * (1 - abs(ideal_conditions['sunlight_intensity'] - df['sunlight_intensity']) / 400)
)

# Normalize the discount score and calculate the discount rate
df['discount_rate'] = df['discount_score'].apply(lambda x: min(x, 0.3))

# Print the first few rows to verify
print(df.head())

# Save to an Excel file
output_file_path = 'synthetic_discount_score_dataset.xlsx'
df.to_excel(output_file_path, index=False)

print(f"Synthetic dataset with {num_samples} data points has been generated and saved as {output_file_path}.")


   spring_precip  winter_precip  spring_area  winter_area  \
0    1309.762701    1335.563307            1            1   
1    1343.037873    1254.001595            1            1   
2    1320.552675    1347.038804            1            1   
3    1308.976637    1392.437709            1            1   
4    1284.730960    1249.750629            0            1   

   surface_soil_moisture  root_zone_soil_moisture  sunlight_intensity  \
0               0.240126                 0.231038          269.863354   
1               0.292929                 0.237303          331.195200   
2               0.209961                 0.252497          472.139466   
3               0.294530                 0.275060          225.283047   
4               0.286949                 0.233351          442.899750   

   discount_score  discount_rate  
0        0.905054            0.3  
1        0.863209            0.3  
2        0.892061            0.3  
3        0.805812            0.3  
4        0.769981  