In [1]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (2.0 kB)
Downloading xgboost-2.1.3-py3-none-manylinux2014_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m64.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-2.1.3


In [2]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from joblib import dump



Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.


In [8]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

print("Training data :")
print(train_df.head())

print("Test data :")
print(test_df.head())


Training data :
   Soil Moisture  Temperature  Soil Humidity  Status
0             25           36           64.0       0
1              5           20           35.0       0
2             27           41           70.0       0
3             90           20           35.0       1
4             81           41           26.0       0
Test data :
   Soil Moisture  Temperature  Soil Humidity  Status
0             58           30           60.0       1
1             46           25           63.0       1
2             54           24           34.0       1
3             41           33           41.0       0
4             43            6           54.0       1


In [4]:
X_train = train_df.drop(columns=['Status'])  
y_train = train_df['Status']  

X_test = test_df.drop(columns=['Status'])
y_test = test_df['Status']

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {
    'objective': 'binary:logistic',  
    'eval_metric': 'logloss',  
    'eta': 0.1,  
    'max_depth': 6,  
    'subsample': 0.8, 
    'colsample_bytree': 0.8,  
}

num_round = 100  
model = xgb.train(params, dtrain, num_round)

In [5]:
y_pred_prob = model.predict(dtest)
y_pred = (y_pred_prob > 0.5).astype(int)



In [6]:
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.79      0.77     28769
           1       0.74      0.70      0.72     24885

    accuracy                           0.75     53654
   macro avg       0.74      0.74      0.74     53654
weighted avg       0.75      0.75      0.74     53654



In [8]:
dump(model, 'xgboost_model.joblib')

print("Model trained and saved 'xgboost_model.joblib'.")


Modèle XGBoost entraîné et sauvegardé sous 'xgboost_model.joblib'.


In [7]:
import boto3
from joblib import dump

s3 = boto3.client('s3')

bucket_name = 'projectkafkabucket'  
model_key = 'models/xgboost_model.joblib'  

s3.upload_file('xgboost_model.joblib', bucket_name, model_key)

print(f"Model saved successfuly in S3 bucket : {bucket_name}/{model_key}")


Model saved successfuly in S3 bucket : projectkafkabucket/models/xgboost_model.joblib
