## **7. Algorithm: XGBoost**  
### **Type: Supervised**

XGBoost builds decision trees sequentially and optimizes learning using boosting techniques. It is a powerful tool widely used in both regression and classification tasks.


In [0]:
# Install xgboost if it's not already installed
try:
    import xgboost
except ImportError:
    %pip install xgboost
    dbutils.library.restartPython()

In [0]:
# Prepare training dataset
import pandas as pd

training_data = pd.DataFrame({
    "size_sqft": [1200, 500, 1500, 750, 1000],              # Size of the house in square feet
    "num_rooms": [3, 1, 4, 2, 3],                           # Number of rooms in the house
    "city_tier": [1, 3, 1, 2, 2],                           # City tier category
    "price": [275000, 150000, 350000, 200000, 250000]       # House price
})


In [0]:
# Train an XGBoost Regressor model
import xgboost as xgb

X = training_data[["size_sqft", "num_rooms", "city_tier"]]
y = training_data["price"]

# Convert data to DMatrix format
dtrain = xgb.DMatrix(X, label=y)

# Train the model
model = xgb.train({}, dtrain, num_boost_round=100)


In [0]:
# Make predictions using the trained XGBoost model
new_data = pd.DataFrame({
    "size_sqft": [950],
    "num_rooms": [2],
    "city_tier": [2]
})

dnew = xgb.DMatrix(new_data)
predictions = model.predict(dnew)

print(f"Predicted price for a {new_data['size_sqft'][0]} sqft house: ${predictions[0]:,.2f}")


In [0]:
### There’s also an XGBoost version for classification use cases.

# from xgboost import XGBClassifier

# X = email_data[["from_company_domain", "num_risky_keywords", "num_spelling_mistakes"]]
# y = email_data["spam_or_not"]

# classifier = XGBClassifier(n_estimators=100)
# classifier.fit(X, y)