In [1]:
import pandas as pd
import numpy as np
import joblib

In [2]:
# 1. G√©n√©ration des donn√©es (500 b√¢timents pour la calibration)
n_samples = 500
np.random.seed(42)

# Surface (Source OSM : ~1,5 million de b√¢timents identifi√©s) [cite: 4, 5]
surface = np.random.gamma(shape=2, scale=100, size=n_samples) + 50

# Usage (Typologie selon tes d√©finitions : R√©sidentiel, Commercial, Public) [cite: 17]
usages = np.random.choice(['Residentiel', 'Commercial', 'Public'], n_samples, p=[0.7, 0.2, 0.1])

# Qualit√© du b√¢ti (Source RGPH 2022 : 42% traditionnel, 31% semi-moderne, 27% moderne)
qualite = np.random.choice(['Traditionnel', 'Semi-moderne', 'Moderne'], n_samples, p=[0.42, 0.31, 0.27])

# √âtages (Estimation locale : 1-2 √©tages en zone rurale/p√©riph√©rique) [cite: 8, 11]
etages = [np.random.randint(1, 3) if q == 'Traditionnel' else np.random.randint(1, 5) for q in qualite]

# Cibles (√ânergie et CO2)
# Consommation calibr√©e sur la faible part √©lectrique du Togo (4%)
conso_elec = (surface * etages * 0.5) * np.random.normal(1, 0.1, n_samples)
# CO2 estim√© √† 12.2% des √©missions combustibles pour les b√¢timents [cite: 41, 43]
co2_emissions = conso_elec * 0.005

df_lome = pd.DataFrame({
    'PrimaryPropertyType': usages,
    'PropertyGFATotal': surface,
    'NumberofFloors': etages,
    'BuildingQuality': qualite,
    'SiteEnergyUse(kBtu)': conso_elec,
    'TotalGHGEmissions': co2_emissions
})

In [3]:
df=pd.read_csv('/content/drive/MyDrive/D-CLIC/projet_energisight/data/df_processed_final.csv')
df.head()

Unnamed: 0,osebuildingid,datayear,buildingtype,primarypropertytype,propertyname,address,city,state,zipcode,taxparcelidentificationnumber,...,cat_Hospitality,cat_Other,cat_Public_Assembly,cat_Residential,cat_Storage,is_public_building,star_score_percentile,renovation_potential_index,surface_exposure_index,structural_complexity_score
0,1,2016,NonResidential,Hotel,Mayflower park hotel,405 Olive way,Seattle,WA,98101.0,659000030,...,True,False,False,False,False,0,0.255988,35.6,6802.615385,10.137121
1,2,2016,NonResidential,Hotel,Paramount Hotel,724 Pine street,Seattle,WA,98101.0,659000220,...,True,False,False,False,False,0,0.271814,7.8,8630.5,6.928784
2,3,2016,NonResidential,Hotel,5673-The Westin Seattle,1900 5th Avenue,Seattle,WA,98101.0,659000475,...,True,False,False,False,False,0,0.148417,26.79,22764.52381,6.472196
3,5,2016,NonResidential,Hotel,HOTEL MAX,620 STEWART ST,Seattle,WA,98101.0,659000640,...,True,False,False,False,False,0,0.221985,39.6,5574.545455,9.92149
4,8,2016,NonResidential,Hotel,WARWICK SEATTLE HOTEL (ID8),401 LENORA ST,Seattle,WA,98121.0,659000970,...,True,False,False,False,False,0,0.5787,9.0,9241.052632,13.041924


In [4]:


# 1. Chargement des mod√®les export√©s pr√©c√©demment
energy_model = joblib.load('/content/drive/MyDrive/D-CLIC/projet_energisight/data/greensight_energy_model.pkl')
co2_model = joblib.load('/content/drive/MyDrive/D-CLIC/projet_energisight/data/greensight_co2_model.pkl')

# Variables communes identifi√©es dans tes documents [cite: 2, 7, 16, 18]
features_communes = ['PropertyGFATotal', 'NumberofFloors', 'PrimaryPropertyType', 'BuildingQuality']

X_lome = df_lome[features_communes]

In [7]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score

# 1. D√©finition des colonnes disponibles √† Lom√© (Sources: OSM, RGPH 2022)
# Nous enlevons 'datayear' et les autres colonnes de Seattle
features_lome = ['PropertyGFATotal', 'NumberofFloors', 'PrimaryPropertyType', 'BuildingQuality']

# 2. Construction d'un nouveau Pr√©processeur simplifi√©
# Cela √©vite que le mod√®le cherche 'datayear'
preprocessor_lome = ColumnTransformer(transformers=[
    ('num', StandardScaler(), ['PropertyGFATotal', 'NumberofFloors']),
    ('cat', OneHotEncoder(handle_unknown='ignore'), ['PrimaryPropertyType', 'BuildingQuality'])
])

# 3. Cr√©ation du nouveau Pipeline GreenSight-Lom√©
# On utilise les param√®tres de votre mod√®le XGBoost pr√©c√©dent
lome_model = Pipeline(steps=[
    ('preprocessor', preprocessor_lome),
    ('regressor', xgb.XGBRegressor(n_estimators=100, learning_rate=0.05, random_state=42))
])

# 4. Entra√Ænement sur votre Small Dataset de Lom√©
# Assurez-vous que df_lome contient bien les donn√©es g√©n√©r√©es pr√©c√©demment
lome_model.fit(df_lome[features_lome], df_lome['SiteEnergyUse(kBtu)'])

print("Mod√®le adapt√© avec succ√®s ! L'erreur 'datayear' est r√©solue.")

# 5. V√©rification du Score
y_pred = lome_model.fit(df_lome[features_lome], df_lome['SiteEnergyUse(kBtu)']).predict(df_lome[features_lome])
print(f"Score R¬≤ sur Lom√© : {r2_score(df_lome['SiteEnergyUse(kBtu)'], y_pred):.4f}")

Mod√®le adapt√© avec succ√®s ! L'erreur 'datayear' est r√©solue.
Score R¬≤ sur Lom√© : 0.9937


In [9]:
import joblib
import xgboost as xgb
from sklearn.pipeline import Pipeline

# 1. Extraire les param√®tres et enlever le pr√©fixe 'regressor__'
raw_params = energy_model.named_steps['regressor'].get_params()
seattle_params_cleaned = {k.replace('regressor__', ''): v for k, v in raw_params.items()
                          if not k.startswith(('regressor', 'check_inverse'))}

# 2. Cr√©er le nouveau pipeline Lom√© avec les VRAIS param√®tres de Seattle
lome_model_final = Pipeline(steps=[
    ('preprocessor', preprocessor_lome),
    ('regressor', xgb.XGBRegressor(**seattle_params_cleaned))
])

# 3. Fine-tuning : Adaptation aux statistiques du Togo
lome_model_final.fit(df_lome[features_lome], df_lome['SiteEnergyUse(kBtu)'])

print("üöÄ Transfer Learning FINALIS√â : Sans Warning et avec l'intelligence de Seattle !")

