In [1]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.datasets import load_iris  # Import dataset sebagai contoh

# Load dataset Iris
iris = load_iris()
X, y = iris.data, iris.target  # Mendefinisikan fitur dan label

# Contoh K-Fold Cross-Validation
model = RandomForestClassifier()
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
print(f"Accuracy: {np.mean(scores):.2f} (+/- {np.std(scores):.2f})")

Accuracy: 0.97 (+/- 0.02)


In [2]:
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import confusion_matrix

# Load dataset Iris
iris = load_iris()
X, y = iris.data, iris.target  # Mendefinisikan fitur dan label

# Membagi data menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Contoh K-Fold Cross-Validation
model = RandomForestClassifier()
model.fit(X_train, y_train)  # Melatih model

scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
print(f"Accuracy: {np.mean(scores):.2f} (+/- {np.std(scores):.2f})")

# Prediksi dan Confusion Matrix
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)

Accuracy: 0.96 (+/- 0.02)
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Definisikan model dan parameter grid
model = RandomForestClassifier()
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30]
}

# Grid Search
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

Best Parameters: {'max_depth': 20, 'n_estimators': 50}
Best Score: 0.9583333333333334


In [4]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier

# Definisikan model dan parameter distribusi
model = RandomForestClassifier()
param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'bootstrap': [True, False]
}

# Random Search
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy')
random_search.fit(X_train, y_train)

print("Best Parameters:", random_search.best_params_)
print("Best Score:", random_search.best_score_)

Best Parameters: {'n_estimators': 200, 'max_depth': None, 'bootstrap': True}
Best Score: 0.95


In [10]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# 1️⃣ Load dataset (contoh dataset sederhana)
data = {'text': ["Gratis hadiah!", "Peningkatan akun segera", "Meeting jam 10", "Beli sekarang diskon 50%", "Mari bertemu di kantor"],
        'label': [1, 1, 0, 1, 0]}  # 1 = Spam, 0 = Non-Spam
df = pd.DataFrame(data)

# 2️⃣ Preprocessing
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['text'])
y = df['label']

# 3️⃣ Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4️⃣ Grid Search untuk Naive Bayes
param_grid_nb = {'alpha': [0.1, 0.5, 1.0, 5]}
nb = GridSearchCV(MultinomialNB(), param_grid_nb, cv=2) # Adjust cv to 2 or less
nb.fit(X_train, y_train)

# 5️⃣ Grid Search untuk SVM
param_grid_svm = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
# Similar change for SVM GridSearchCV
svm = GridSearchCV(SVC(), param_grid_svm, cv=2)  # Adjust cv to 2 or less
svm.fit(X_train, y_train)

# 6️⃣ Evaluasi
best_nb = nb.best_estimator_
best_svm = svm.best_estimator_

y_pred_nb = best_nb.predict(X_test)
y_pred_svm = best_svm.predict(X_test)

print("Naive Bayes Accuracy:", accuracy_score(y_test, y_pred_nb))
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Best Naive Bayes Parameters:", nb.best_params_)
print("Best SVM Parameters:", svm.best_params_)

Naive Bayes Accuracy: 1.0
SVM Accuracy: 1.0
Best Naive Bayes Parameters: {'alpha': 0.1}
Best SVM Parameters: {'C': 0.1, 'kernel': 'linear'}


In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# 1️⃣ Load dataset (contoh dataset sederhana)
data = {'size': [50, 100, 150, 200, 250],
        'location': [1, 2, 3, 2, 1],  # 1: Suburban, 2: Urban, 3: City Center
        'year_built': [2000, 1995, 2010, 2005, 2020],
        'price': [100000, 150000, 250000, 200000, 300000]}
df = pd.DataFrame(data)

# 2️⃣ Split Data
X = df[['size', 'location', 'year_built']]
y = df['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3️⃣ Random Search untuk Random Forest
param_dist_rf = {'n_estimators': [50, 100, 200], 'max_depth': [None, 5, 10], 'min_samples_split': [2, 5, 10]}
rf = RandomizedSearchCV(RandomForestRegressor(), param_distributions=param_dist_rf, n_iter=5, cv=3, random_state=42)
rf.fit(X_train, y_train)

# 4️⃣ Random Search untuk Gradient Boosting
param_dist_gb = {'n_estimators': [50, 100, 200], 'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 5, 7]}
gb = RandomizedSearchCV(GradientBoostingRegressor(), param_distributions=param_dist_gb, n_iter=5, cv=3, random_state=42)
gb.fit(X_train, y_train)

# 5️⃣ Evaluasi Model
best_rf = rf.best_estimator_
best_gb = gb.best_estimator_

y_pred_rf = best_rf.predict(X_test)
y_pred_gb = best_gb.predict(X_test)

print("Random Forest MAE:", mean_absolute_error(y_test, y_pred_rf))
print("Gradient Boosting MAE:", mean_absolute_error(y_test, y_pred_gb))
print("Best Random Forest Parameters:", rf.best_params_)
print("Best Gradient Boosting Parameters:", gb.best_params_)



Random Forest MAE: 64000.0
Gradient Boosting MAE: 34927.28657847928
Best Random Forest Parameters: {'n_estimators': 200, 'min_samples_split': 10, 'max_depth': None}
Best Gradient Boosting Parameters: {'n_estimators': 200, 'max_depth': 7, 'learning_rate': 0.01}
