In [None]:
# Import necessary libraries
import pandas
import io
from sklearn import tree
from sklearn import linear_model
import warnings
from sklearn import metrics
from sklearn.metrics import confusion_matrix 

# Suppress warning messages for cleaner output
warnings.filterwarnings(action='ignore', category=UserWarning)

# Mount Google Drive to access dataset files
from google.colab import drive
drive.mount('/content/gdrive')

# =============================
# 1. Load and Prepare Training Data
# =============================

# Read the training dataset from Google Drive
df = pandas.read_csv("gdrive/MyDrive/Data /BrisbaneWeatherTrainingData.csv")

# Map the alert levels from string labels to numeric classes
# These are the original alert levels:
# '-1' = No Alert, 'l' = Low, 'm' = Medium, 'h' = High
# These are mapped to:
# '-1' → 0, 'l' → 1, 'm' → 2, 'h' → 3
d = {'-1': 0, 'l': 1, 'm': 2, 'h': 3}
df['AlertLevel'] = df['AlertLevel'].map(d)

# Define the input features
features = ['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine']
X = df[features]         # Features matrix
y = df['AlertLevel']     # Target labels

# =============================
# 2. Train Logistic Regression Model
# =============================

# Create and train the logistic regression model
logr = linear_model.LogisticRegression()
logr = logr.fit(X, y)

# =============================
# 3. Load and Prepare Test Data
# =============================

# Read the test dataset
df1 = pandas.read_csv(
    "gdrive/MyDrive/Data /BrisbaneWeatherTestData.csv", 
    usecols=['Date','AlertLevel','MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine']
)

# Map the alert levels in the test set using the same dictionary
df1['AlertLevel'] = df1['AlertLevel'].map(d)

# =============================
# 4. Make Predictions on Test Data
# =============================

true_y = []  # Actual values from the test set
pred_y = []  # Model's predicted values

# Loop through first 100 rows of the test set to make predictions
for num in range(0, 100):
   true_y.append(df1.iloc[num,1])  # actual class
   pred_y.append(logr.predict([[df1.iloc[num,2], df1.iloc[num,3], 
                                df1.iloc[num,4], df1.iloc[num,5], 
                                df1.iloc[num,6]]]))  # predicted class

# =============================
# 5. Evaluate Model Performance
# =============================

# Calculate accuracy score
Accuracy = metrics.accuracy_score(true_y, pred_y)
print("Model Accuracy is: ", Accuracy)

# Display the confusion matrix
confusion_matrix = metrics.confusion_matrix(true_y, pred_y)
print("Confusion Matrix is: ")
print(confusion_matrix)

Mounted at /content/gdrive
Model Accuracy is:  0.39
Confusion Matrix is: 
[[33  0  0  0]
 [34  0  0  0]
 [18  0  6  1]
 [ 5  0  3  0]]
