In [47]:
import random
import pandas as pd
import numpy as np

# Constants for data generation
num_households = 3000
noms = [
    "Traoré", "Koné", "Kouassi", "Coulibaly", "Diabaté", "Ouattara", "Koffi", "N'Guessan",
    "Kouamé", "Yao", "Touré", "Fofana", "Bakayoko", "Kassi", "Kacou", "Brou", "Adou", "Aka",
    "Doumbia", "Sangaré", "Soro", "Dogbo", "Essis", "Kolo", "Blé", "Yao", "Gnagne", "Anoman",
    "Ehui", "Diarra", "Zézé", "Zadi", "Amani", "Ekra", "Obrou", "Gon", "Goua", "Adja", "Dago",
    "Dagri", "Adjé", "Ahoussou", "Aké", "Krou", "Kablan", "Bédié", "Gbato", "Doffou", "Attié",
    "Ayé", "Anoma", "Akissi", "Bahi", "Kouadio", "Diawara", "Abouo", "Dongo", "Atse", "Dahi",
    "Diomandé", "Bli", "Abié", "Boli", "Allou", "Armand", "Akaffou", "Adjinan", "Bamba", "Béné",
    "Bakou", "Angora", "Agnan", "Brissa", "Chiekh", "Daho", "Diallo", "Djanhan", "Gnahoré", 
    "Gnagne", "Koudou", "Krasso", "Loba", "Mambo", "N’da", "N'goran", "N’Dri", "N'Zi", "Nandé",
]
prenoms = [
    "Awa", "Kouadio", "Fatou", "Bakari", "Aya", "Aboubacar", "Koffi", "Adjoua", "Alima",
    "Assouan", "Akissi", "Bamba", "Belange", "Biaka", "Baoua", "Ble", "Danho", "Djeneba",
    "Ebene", "Edy", "Emmanuelle", "Faty", "Francine", "Fousseni", "Fofana", "Gbessi", "Gbato",
    "Hamed", "Habiba", "Jean-Baptiste", "Jacqueline", "Karidja", "Kady", "Kamal", "Kadjo",
    "Lamine", "Loukou", "Lago", "Mariama", "Madi", "Mariatou", "Moussa", "Mamadou", "Manuella",
    "Nafissatou", "Naby", "Nada", "Oumar", "Odette", "Patricia", "Paulo", "Rokia", "Rosine",
    "Saadia", "Salimata", "Solange", "Séraphin", "Téné", "Yacouba", "Yao", "Yasmina", "Zoumana",
]

cities = ["Abidjan", "Bouaké", "Abengourou", "Daloa", "San-Pédro", "Korhogo", "Yamoussoukro", "Gagnoa"]
localities = {
    "Abidjan": ["Yopougon", "Cocody", "Treichville", "Marcory", "Plateau", "Abobo"],
    "Bouaké": ["Dar-es-Salam", "Belleville", "Koko", "Zone Industrielle", "Ahougnansou"],
    "Daloa": ["Lobia", "Gbokora", "Orly", "Garage", "Commerce", "Gadougou"],
    "Korhogo": ["Soba", "Quartier du Plateau", "Sinistré", "Tchologo", "Kassirimé"],
    "Abengourou": ["Cocoteraie", "Campement", "Sous-préfecture", "Ebilassokro", "Habitat"],
    "Yamoussoukro": ["Kokrenou", "N'zuessi", "Assabou", "Dioulabougou", "Morofé"],
    "Gagnoa": ["Garahio", "Libreville", "Barreau", "Babré", "Brégbo"],
    "San-Pédro": ["Bardot", "Seweke", "Balmer", "Cité", "Lac"]
}

household_sizes = ["1-3", "4-6", "7-9", "10-12", "12+"]
income_ranges = ["-200k", "201-500k", "501-1M", "1-3M", "3M+"]
housing_types = ["Studio", "Appartement", "Duplex", "Triplex"]
months = ["Janvier", "Février", "Mars", "Avril", "Mai", "Juin", "Juillet", "Août", "Septembre", "Octobre", "Novembre", "Décembre"]
monthly_order = {
    "Décembre": 1.3, "Janvier": 1.2, "Juillet": 1.15, "Août": 1.15,
    "Juin": 1.1, "Novembre": 1.05, "Septembre": 1.0, "Octobre": 0.95,
    "Février": 0.9, "Mars": 0.85, "Avril": 0.8, "Mai": 0.75
}

# Generate household data
data = []

for household_id in range(1, num_households + 1):
    city = random.choices(cities, weights=[50, 10, 8, 8, 8, 8, 4, 4], k=1)[0]
    locality = random.choice(localities[city])

    size = random.choice(household_sizes)
    # Adjust income range based on household size
    if size == "1-3":
        income = random.choices(income_ranges, weights=[35, 35, 24, 5, 1], k=1)[0]
    elif size == "4-6":
        income = random.choices(income_ranges, weights=[25, 25, 30, 15, 5], k=1)[0]
    elif size == "7-9":
        income = random.choices(income_ranges, weights=[15, 25, 35, 10, 10], k=1)[0]
    else:  # Ménages de grande taille
        income = random.choices(income_ranges, weights=[5, 15, 30, 30, 20], k=1)[0]

    # Select random name and first name
    nom = random.choice(noms)
    prenom = random.choice(prenoms)

    # Determine housing type based on household size
    if size == "1-3":
        housing_type = random.choices(housing_types, weights=[60, 30, 5, 5], k=1)[0]
    elif size == "4-6":
        housing_type = random.choices(housing_types, weights=[45, 45, 5, 5], k=1)[0]
    elif size == "7-9":
        housing_type = random.choices(housing_types, weights=[40, 25, 25, 10], k=1)[0]
    elif size == "10-12":
        housing_type = random.choices(housing_types, weights=[30, 25, 25, 20], k=1)[0]
    else:  # Ménages très grands
        housing_type = random.choices(housing_types, weights=[20, 10, 40, 30], k=1)[0]

    num_infants = random.randint(0, 1) if size == "1-3" else random.randint(0, 2)
    num_special_diet = random.randint(0, 1) if size in ["1-3", "4-6"] else random.randint(1, 2)

    for month in months:
        # Base bottle consumption influenced by household size and income
        b6 = max(random.randint(1, 2), 1)
        b12 = max(random.randint(1, 2), 1)
        if size in ["7-9", "10-12"]:
            b6 += max(random.randint(1, 2), 1)
        if size == "12+":
            b12 += max(random.randint(1, 2), 1)
        if income in ["1-3M", "3M+"]:
            b6 += max(random.randint(1, 2), 1)

        # Adjust for high consumption months
        b6 = int(b6 * monthly_order.get(month, 1))
        b12 = int(b12 * monthly_order.get(month, 1))

        data.append({
            "ID_Ménage": household_id,
            "Nom": nom,
            "Prénom": prenom,
            "Ville": city,
            "Localité": locality,
            "Type_d'habitation": housing_type,
            "Taille de ménage": size,
            "Nbre_nourisson": num_infants,
            "Pers_Reg": num_special_diet,
            "revenu": income,
            "Mois": month,
            "B6": b6,
            "B12": b12,
            "Total": b6*12 + b12*23
        })

df = pd.DataFrame(data)
df.head()
file_path = r"C:/Users/Levi/Desktop/HCKT_donnee_final_true.csv"
df.to_csv(file_path, index=False)

In [48]:
df.head()

Unnamed: 0,ID_Ménage,Nom,Prénom,Ville,Localité,Type_d'habitation,Taille de ménage,Nbre_nourisson,Pers_Reg,revenu,Mois,B6,B12,Total
0,1,Diarra,Awa,Gagnoa,Babré,Studio,12+,1,1,501-1M,Janvier,2,4,116
1,1,Diarra,Awa,Gagnoa,Babré,Studio,12+,1,1,501-1M,Février,1,2,58
2,1,Diarra,Awa,Gagnoa,Babré,Studio,12+,1,1,501-1M,Mars,0,2,46
3,1,Diarra,Awa,Gagnoa,Babré,Studio,12+,1,1,501-1M,Avril,0,2,46
4,1,Diarra,Awa,Gagnoa,Babré,Studio,12+,1,1,501-1M,Mai,1,1,35


In [35]:
# Créer les mappings
taille_mapping = {"1-3": 0, "4-6": 1, "7-9": 2, "10-12": 3, "12+": 4}
revenu_mapping = {"-200k": 0, "201-500k": 1, "501-1M": 2, "1-3M": 3, "3M+": 4}
habitation_mapping = {"Studio": 0, "Appartement": 1, "Duplex": 2, "Triplex": 3}
mois_mapping = {
    "Janvier": 1, "Février": 2, "Mars": 3, "Avril": 4,
    "Mai": 5, "Juin": 6, "Juillet": 7, "Août": 8,
    "Septembre": 9, "Octobre": 10, "Novembre": 11, "Décembre": 12

}

ville_mapping = {
    "Abengourou": 0,
    "Abidjan": 1,
    "Bouaké": 2,
    "Daloa": 3,
    "Gagnoa": 4,
    "Korhogo": 5,
    "San-Pédro": 6,
    "Yamoussoukro": 7
}

In [36]:

# Application des mappings
df['Taille de ménage'] = df['Taille de ménage'].map(taille_mapping)
df['revenu'] = df['revenu'].map(revenu_mapping)
df['Type_d\'habitation'] = df['Type_d\'habitation'].map(habitation_mapping)
df['Mois'] = df['Mois'].map(mois_mapping)
df['Ville']=df['Ville'].map(ville_mapping)

In [37]:
df.head()

Unnamed: 0,ID_Ménage,Nom,Prénom,Ville,Localité,Type_d'habitation,Taille de ménage,Nbre_nourisson,Pers_Reg,revenu,Mois,B6,B12,Total
0,1,Bahi,Mariatou,1,Marcory,0,1,1,1,2,1,2,2,70
1,1,Bahi,Mariatou,1,Marcory,0,1,1,1,2,2,1,1,35
2,1,Bahi,Mariatou,1,Marcory,0,1,1,1,2,3,1,0,12
3,1,Bahi,Mariatou,1,Marcory,0,1,1,1,2,4,1,0,12
4,1,Bahi,Mariatou,1,Marcory,0,1,1,1,2,5,1,0,12


In [38]:
X = df[['Ville', 'Taille de ménage', "Type_d'habitation",'Mois']]
y = df['Total']


In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training (Random Forest Regressor)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [40]:
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 309.58857185052216


In [42]:
X_test

Unnamed: 0,Ville,Taille de ménage,Type_d'habitation,Mois
16461,1,3,1,10
23579,1,3,2,12
23640,6,4,3,1
25635,1,1,0,4
8840,0,0,0,9
...,...,...,...,...
28051,1,4,0,8
8206,1,4,0,11
9370,7,1,1,11
8094,1,0,0,7


In [44]:
import numpy as np

# Ensure X_2 is a 2D array
X_2 = np.array([[1, 2, 3, 11]])

# Make a prediction
y_pred = model.predict(X_2)

print("Prediction:", y_pred)

Prediction: [71.73697799]


