In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Load your dataset
df = pd.read_csv("C:/JPEM_Git_Main/JPEM/JPEM_SAIT/Winter2025_PROJ406/Team 7/data/foodfusion.csv")

# Simulate fictitious columns

# Generate random satisfaction scores
np.random.seed(42)  # For reproducibility
base_satisfaction = np.random.uniform(1, 5, size=len(df))  # Random scores between 1 and 5

# Introduce bias for retained customers
retention_boost = df['is_retained'].apply(lambda x: np.random.uniform(0.5, 1.5) if x == 1 else np.random.uniform(-0.5, 0.5))

# Add retention bias to the base satisfaction score, with some random noise
df['SatisfactionScore'] = np.clip(base_satisfaction + retention_boost, 1, 5)  # Ensure scores stay within 1 to 5

# Preview the dataset
print(df[['is_retained', 'SatisfactionScore']].head())
# 1. Amount Spent (randomly generated based on SatisfactionScore)
np.random.seed(42)
df['amount_spent'] = np.round(np.random.uniform(20, 150, size=len(df)) * (df['SatisfactionScore'] / 5), 2)

# 2. Frequency of Meals Purchased (based on age group)
age_to_frequency = {
    '0-18 years': np.random.randint(1, 5),
    '19-24 years': np.random.randint(3, 10),
    '25-30 years': np.random.randint(5, 12),
    '31-36 years': np.random.randint(4, 10),
    '37-45 years': np.random.randint(3, 8),
    '46+ years': np.random.randint(2, 6)
}
df['meal_frequency'] = df['age_group'].map(age_to_frequency)

# 3. Average Order Value (derived from amount spent and frequency)
df['average_order_value'] = np.round(df['amount_spent'] / (df['meal_frequency'] + 1), 2)  # Avoid division by zero

# 4. Discount Usage Rate (randomly assigned)
df['discount_usage_rate'] = np.random.uniform(0, 1, size=len(df))  # Values between 0 (low) and 1 (high)

# 5. Meal Variety Index (simulate number of unique meal types ordered)
meal_types = ['Breakfast', 'Lunch', 'Dinner', 'Snack']
df['meal_variety_index'] = np.random.randint(1, len(meal_types)+1, size=len(df))

# 6. Days Active (calculated based on a random start date within the past year)
df['days_active'] = np.random.randint(1, 365, size=len(df))

# Save the enriched dataset
df.to_csv("updated_dataset_with_fictitious_columns.csv", index=False)

   is_retained  SatisfactionScore
0        False           2.371801
1        False           4.635769
2         True           4.604130
3         True           4.501901
4         True           2.600699
