In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [4]:
# Load data
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a decision tree classifier and fit it to the training data
clf = DecisionTreeClassifier(max_depth=3, random_state=42)
clf.fit(X_train, y_train)

# Predict the labels of the test set
y_pred = clf.predict(X_test)

# Print the accuracy of the classifier
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

Accuracy: 1.0


In [6]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

In [7]:
# Load data
california = fetch_california_housing()
X = california.data
y = california.target

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a random forest regressor and fit it to the training data
regr = RandomForestRegressor(n_estimators=100, random_state=42)
regr.fit(X_train, y_train)

# Predict the prices of the test set
y_pred = regr.predict(X_test)

# Print the root mean squared error of the predictions
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred))}")

RMSE: 0.5053399773665033


In [9]:
import xgboost as xgb
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [10]:
# Load data
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Convert targets to binary classification
y = [1 if target > 140 else 0 for target in y]

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the dataset into an optimized data structure called Dmatrix that XGBoost supports
D_train = xgb.DMatrix(X_train, label=y_train)
D_test = xgb.DMatrix(X_test, label=y_test)

# Define the parameters for the XGBoost classifier
param = {
    'eta': 0.3, 
    'max_depth': 3,  
    'objective': 'multi:softprob',  
    'num_class': 2} 

steps = 20  # The number of training iterations

# Train the model
model = xgb.train(param, D_train, steps)

# Predict the labels of the test set
preds = model.predict(D_test)
best_preds = np.asarray([np.argmax(line) for line in preds])

# Print the accuracy of the classifier
print(f"Accuracy: {accuracy_score(y_test, best_preds)}")

Accuracy: 0.7528089887640449
