In [None]:
# Import libraries
import pandas
import io
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import warnings

# Suppress user warnings to keep output clean
warnings.filterwarnings(action='ignore', category=UserWarning)

# Mount Google Drive to access CSV files
from google.colab import drive
drive.mount('/content/gdrive')

# =============================
# 1. Load and Prepare Training Data
# =============================

# Load the training dataset
df = pandas.read_csv("gdrive/MyDrive/Data /BrisbaneWeatherTrainingData.csv")

# Map the alert levels to numerical values for model training
# These represent the forest fire alert categories:
# '-1' = No Alert, 'l' = Low, 'm' = Medium, 'h' = High
# The model needs numeric labels, so we map them as follows:
# '-1' → 0, 'l' → 1, 'm' → 2, 'h' → 3
d = {'-1': 0, 'l': 1, 'm': 2, 'h': 3}
df['AlertLevel'] = df['AlertLevel'].map(d)

# Select the features (independent variables)
features = ['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine']
X = df[features]

# Select the label (dependent variable)
y = df['AlertLevel']

# =============================
# 2. Train the Decision Tree Classifier
# =============================

# Create a Decision Tree Classifier object
dtree = DecisionTreeClassifier()

# Fit (train) the model on the training data
dtree = dtree.fit(X, y)

# =============================
# 3. Load and Prepare Test Data
# =============================

# Load the test dataset with required columns
df1 = pandas.read_csv(
    "gdrive/MyDrive/Data /BrisbaneWeatherTestData.csv", 
    usecols=['Date','AlertLevel','MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine']
)

# Map alert levels in the test set to numeric values (same as training)
df1['AlertLevel'] = df1['AlertLevel'].map(d)

# =============================
# 4. Evaluate Model on Training Data
# =============================

# Initialize lists to store true and predicted labels
training_true_y = []
training_pred_y = []

# Re-read the training data (just in case) for evaluation
df_training_accuracy = pandas.read_csv(
    "gdrive/MyDrive/Data /BrisbaneWeatherTrainingData.csv", 
    usecols=['Date','AlertLevel','MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine']
)
df_training_accuracy['AlertLevel'] = df_training_accuracy['AlertLevel'].map(d)

# Generate predictions for training data
for num in range(len(df_training_accuracy)):
    training_true_y.append(df_training_accuracy.iloc[num,1])  # actual value
    # prediction using model
    training_pred_y.append(
        dtree.predict([[df_training_accuracy.iloc[num,2], df_training_accuracy.iloc[num,3],
                        df_training_accuracy.iloc[num,4], df_training_accuracy.iloc[num,5],
                        df_training_accuracy.iloc[num,6]]])
    )

# Calculate and print training accuracy
training_accuracy = metrics.accuracy_score(training_true_y, training_pred_y)
print("Model Training Accuracy is: ", training_accuracy)

# Print training confusion matrix
train_confusion_matrix = metrics.confusion_matrix(training_true_y, training_pred_y)
print("Training Confusion Matrix is:")
print(train_confusion_matrix)

# =============================
# 5. Evaluate Model on Test Data
# =============================

# Initialize lists to store test true and predicted labels
true_y = []
pred_y = []

# Generate predictions for the first 100 rows of the test data
for num in range(100):
    true_y.append(df1.iloc[num, 1])  # actual value
    pred_y.append(
        dtree.predict([[df1.iloc[num,2], df1.iloc[num,3],
                        df1.iloc[num,4], df1.iloc[num,5],
                        df1.iloc[num,6]]])
    )

# Calculate and print test accuracy
Accuracy = metrics.accuracy_score(true_y, pred_y)
print("Model Accuracy is: ", Accuracy)

# Print confusion matrix for test predictions
confusion_matrix = metrics.confusion_matrix(true_y, pred_y)
print("Confusion Matrix is:")
print(confusion_matrix)

Mounted at /content/gdrive
Model Training Accuracy is:  1.0
Training Confusion Matrix is:
[[1090    0    0    0]
 [   0   91    0    0]
 [   0    0  427    0]
 [   0    0    0   49]]
Model Accuracy is:  0.72
Confusion Matrix is:
[[23  3  7  0]
 [ 0 34  0  0]
 [14  0  7  4]
 [ 0  0  0  8]]
