# 1. Import Library

In [17]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt


# Load Dataset

In [18]:
# Load the dataset
weather_data = pd.read_csv('Weather.csv')

# 2. EDA

In [None]:
# Initial Data Exploration
print("Initial Data Information:")
print(weather_data.info())

In [None]:
print("\nMissing Values:\n", weather_data.isnull().sum())

In [None]:
print("\nDuplicated Rows:", weather_data.duplicated().sum())

In [19]:
# Drop the 'No' column as it does not contribute to the model
weather_data = weather_data.drop(columns=['No'])


In [20]:
# Apply One-Hot Encoding to categorical features
# Melakukan one-hot encoding pada kolom kategorikal
# pd.get_dummies() mengubah variabel kategorikal menjadi variabel dummy/indikator (0/1)
# columns=[...] menentukan kolom mana yang akan di-encode
# drop_first=True menghilangkan satu kolom dummy untuk menghindari multikolinearitas
weather_data_encoded = pd.get_dummies(weather_data, columns=['Outlook', 'Temperature', 'Humidity', 'Windy'], drop_first=True)


# 3. Split Data menjadi Training test dan Testing Test 70% & 30%

In [21]:
# Split the data into features and target variable
X_encoded = weather_data_encoded.drop(columns=['Play'])
y_encoded = weather_data_encoded['Play']



In [22]:
# Train-test split
X_train_encoded, X_test_encoded, y_train_encoded, y_test_encoded = train_test_split(X_encoded, y_encoded, test_size=0.3, random_state=42)


# 4. Model Decision Tree

In [None]:
# Initialize and train the Decision Tree model
dt_model_encoded = DecisionTreeClassifier(random_state=42)

dt_model_encoded.fit(X_train_encoded, y_train_encoded)


In [24]:
# Predict on the test set
y_pred_encoded = dt_model_encoded.predict(X_test_encoded)



# 5. Evaluasi Matriks

In [25]:
# Evaluate the model
accuracy_encoded = accuracy_score(y_test_encoded, y_pred_encoded)
classification_rep_encoded = classification_report(y_test_encoded, y_pred_encoded, target_names=["No", "Yes"])
confusion_mat_encoded = confusion_matrix(y_test_encoded, y_pred_encoded)



In [None]:
# Print evaluation results
print("Accuracy:", accuracy_encoded)
print("\nClassification Report:\n", classification_rep_encoded)
print("\nConfusion Matrix:\n", confusion_mat_encoded)



# 6. Visualisasi

In [None]:
# Plotting the Decision Tree
plt.figure(figsize=(12, 8))
plot_tree(dt_model_encoded, feature_names=X_encoded.columns, class_names=["No", "Yes"], filled=True, rounded=True)
plt.title("Decision Tree Visualization")
plt.show()