In [None]:
# Import essential libraries
import pandas
import io
from sklearn import tree
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
import warnings

# Ignore UserWarnings to keep output clean
warnings.filterwarnings(action='ignore', category=UserWarning)

# Mount Google Drive to access dataset files
from google.colab import drive
drive.mount('/content/gdrive')

# =============================
# 1. Load and Prepare Training Data
# =============================

# Read the training data CSV file
df = pandas.read_csv("gdrive/MyDrive/Data /BrisbaneWeatherTrainingData.csv")

# Convert categorical alert levels to numerical values for classification
# Original meanings:
# '-1' = No Alert, 'l' = Low, 'm' = Medium, 'h' = High
# Mapping to integers for the model:
# '-1' → 0, 'l' → 1, 'm' → 2, 'h' → 3
d = {'-1': 0, 'l': 1, 'm': 2, 'h': 3}
df['AlertLevel'] = df['AlertLevel'].map(d)

# Select relevant meteorological features
features = ['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine']
X = df[features]         # Features matrix
y = df['AlertLevel']     # Labels (target)

# =============================
# 2. Train Random Forest Classifier
# =============================

# Initialize and train the Random Forest classifier
rForest = RandomForestClassifier()
rForest = rForest.fit(X, y)

# =============================
# 3. Load and Prepare Test Data
# =============================

# Load the test dataset with selected columns
df1 = pandas.read_csv(
    "gdrive/MyDrive/Data /BrisbaneWeatherTestData.csv", 
    usecols=['Date','AlertLevel','MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine']
)

# Convert categorical alert levels to numeric format
df1['AlertLevel'] = df1['AlertLevel'].map(d)

# =============================
# 4. Evaluate Model on Training Data
# =============================

# Re-load training data for evaluation purposes
df_training_accuracy = pandas.read_csv(
    "gdrive/MyDrive/Data /BrisbaneWeatherTrainingData.csv", 
    usecols=['Date','AlertLevel','MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine']
)
df_training_accuracy['AlertLevel'] = df_training_accuracy['AlertLevel'].map(d)

# Initialize lists to store predictions and actual values
training_true_y = []
training_pred_y = []

# Predict labels for each training sample
for num in range(len(df_training_accuracy)):
   training_true_y.append(df_training_accuracy.iloc[num,1])  # Actual
   training_pred_y.append(
       rForest.predict([[df_training_accuracy.iloc[num,2], df_training_accuracy.iloc[num,3],
                         df_training_accuracy.iloc[num,4], df_training_accuracy.iloc[num,5],
                         df_training_accuracy.iloc[num,6]]])
   )  # Predicted

# Calculate and print training accuracy
training_accuracy = metrics.accuracy_score(training_true_y, training_pred_y)
print("Model Training Accuracy is: ", training_accuracy)

# Display confusion matrix for training data
train_confusion_matrix = metrics.confusion_matrix(training_true_y, training_pred_y)
print("Training Confusion Matrix is:")
print(train_confusion_matrix)

# =============================
# 5. Evaluate Model on Test Data
# =============================

# Initialize lists for test predictions
true_y = []
pred_y = []

# Predict the first 100 test samples
for num in range(0, 100):
   true_y.append(df1.iloc[num,1])  # Actual value
   pred_y.append(
       rForest.predict([[df1.iloc[num,2], df1.iloc[num,3],
                         df1.iloc[num,4], df1.iloc[num,5],
                         df1.iloc[num,6]]])
   )  # Model prediction

# Calculate and print test accuracy
Accuracy = metrics.accuracy_score(true_y, pred_y)
print("Model Accuracy is: ", Accuracy)

# Display confusion matrix for test predictions
confusion_matrix = metrics.confusion_matrix(true_y, pred_y)
print("Confusion Matrix is:")
print(confusion_matrix)

Mounted at /content/gdrive
Model Training Accuracy is:  1.0
Training Confusion Matrix is:
[[1090    0    0    0]
 [   0   91    0    0]
 [   0    0  427    0]
 [   0    0    0   49]]
Model Accuracy is:  0.8
Confusion Matrix is:
[[32  0  1  0]
 [ 0 34  0  0]
 [16  0  6  3]
 [ 0  0  0  8]]


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
importances = rForest.feature_importances_

In [None]:
importances

array([0.25929333, 0.24034309, 0.07808078, 0.19663964, 0.22564316])