# 02_Feature_Engineering.ipynb – FarmPulse

##  1️ Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib

## 2️ Load Dataset & Preprocessing Pipeline

In [2]:
df = pd.read_csv("/workspaces/FarmPulse-Smart-Agriculture-App/Data/raw/FarmPulse_data.csv")
full_pipeline = joblib.load("/workspaces/FarmPulse-Smart-Agriculture-App/models/preprocessing_pipeline.pkl")
print("Dataset shape:", df.shape)

Dataset shape: (9986, 20)


## 3️ Feature Engineering

In [3]:
# Soil Score: combine Soil_Health + Soil_pH_Category + Fertilizer_Needs
df['Soil_Score'] = df['Soil_Health'].astype(str) + '_' + df['Soil_pH_Category'].astype(str) + '_' + df['Fertilizer_Needs'].astype(str)

# Climate Risk Score: combine Climate_Stress + Drought_Risk
df['Climate_Risk_Score'] = df['Climate_Stress'].astype(str) + '_' + df['Drought_Risk'].astype(str)

# Temperature Amplitude (if not already in dataset)
if 'Temp_Amplitude' not in df.columns:
    df['Temp_Amplitude'] = df['Temp_Max_Moy'] - df['Temp_Min_Moy']

# check new features
print(df[['Soil_Score','Climate_Risk_Score','Temp_Amplitude']].head())

                     Soil_Score Climate_Risk_Score  Temp_Amplitude
0  Moderate_Neutral_Medium Need            Low_Low            14.2
1  Moderate_Neutral_Medium Need            Low_Low            14.2
2      Good_Neutral_Medium Need            Low_Low            14.2
3      Good_Neutral_Medium Need            Low_Low            14.2
4      Good_Neutral_Medium Need            Low_Low            14.2


## 4️ Visualizations – Save in reports/figures

In [9]:
import os
import matplotlib.pyplot as plt

# Make sure folder exists
os.makedirs("reports/figures", exist_ok=True)

# Histogram for new features
new_numeric_features = ['Temp_Amplitude']
for col in new_numeric_features:
    plt.figure(figsize=(6,4))
    df[col].hist(bins=20)
    plt.title(col)
    # Correct f-string
    plt.savefig(f"reports/figures/hist_{col}.png")
    plt.close()


In [10]:
# Countplot for categorical engineered features
new_categorical_features = ['Soil_Score','Climate_Risk_Score']
for col in new_categorical_features:
    plt.figure(figsize=(10,4))
    sns.countplot(data=df, x=col, order=df[col].value_counts().index)
    plt.xticks(rotation=45)
    plt.title(f"Countplot of {col}")
    plt.savefig(f"reports/figures/count_{col}.png")
    plt.close()

In [11]:
# Correlation Heatmap including new numeric features
plt.figure(figsize=(12,8))
numeric_cols = df.select_dtypes(include="number").columns.tolist()
sns.heatmap(df[numeric_cols].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap – After Feature Engineering")
plt.savefig("reports/figures/corr_heatmap_features.png")
plt.close()

In [13]:
df.to_csv("/workspaces/FarmPulse-Smart-Agriculture-App/Data/processed/FarmPulse_featured.csv", index=False)
print(" Processed Dataset saved: data/processed/FarmPulse_featured.csv")

 Processed Dataset saved: data/processed/FarmPulse_featured.csv
