## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib

## Chargement

In [2]:
df=pd.read_csv("data/Crop_recommendation.csv")

## Séparation Features/Target

In [3]:
X = df.drop('label', axis=1)
y = df['label']

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"\nCultures disponibles: {y.unique()}")

## Encodage du target

In [4]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

print(f"\n Mapping des labels:")
for i, crop in enumerate(le.classes_):
    print(f"{i}: {crop}")

joblib.dump(le, 'models/label_encoder.pkl')

## Split Train/Test

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, 
    test_size=0.2, 
    random_state=42, 
    stratify=y_encoded
)

print(f"Train set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

## Vérification de la distribution

In [6]:
unique, counts = np.unique(y_train, return_counts=True)
print(f"\n Distribution Train:")
for u, c in zip(unique, counts):
    print(f"  {le.classes_[u]}: {c}")

## Standardisation

In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Sauvegarde du scaler

In [8]:
joblib.dump(scaler, 'models/scaler.pkl')

print("Standardisation terminée")
print(f"Mean après scaling: {X_train_scaled.mean(axis=0)}")
print(f"Std après scaling: {X_train_scaled.std(axis=0)}")

## Sauvegarde des données préparées

In [9]:
np.save('data/X_train_scaled.npy', X_train_scaled)
np.save('data/X_test_scaled.npy', X_test_scaled)
np.save('data/y_train.npy', y_train)
np.save('data/y_test.npy', y_test)

print("\n Données sauvegardées avec succès!")

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=37cd5642-eb00-4cb1-969e-a9bc85cf5e83' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>