In [3]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load the dataset
data = pd.read_csv('weatherAUS.csv')


In [4]:
# Select the features and target variable
features = data.iloc[:, [1, 2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]].values
target = data.iloc[:, -1].values





In [5]:
# Impute missing values with most frequent value
imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
features = imputer.fit_transform(features)




In [6]:
# Encode categorical features
encoder1 = LabelEncoder()
features[:, 0] = encoder1.fit_transform(features[:, 0])
encoder2 = LabelEncoder()
features[:, 4] = encoder2.fit_transform(features[:, 4])
encoder3 = LabelEncoder()
features[:, 6] = encoder3.fit_transform(features[:, 6])
encoder4 = LabelEncoder()
features[:, 7] = encoder4.fit_transform(features[:, 7])
encoder5 = LabelEncoder()
features[:, -1] = encoder5.fit_transform(features[:, -1])

# Encode target variable
encoder6 = LabelEncoder()
target = encoder6.fit_transform(target)



In [7]:
# Scale the features
scaler = StandardScaler()
features = scaler.fit_transform(features)



In [8]:
# Split the data into training and testing sets
train_features, test_features, train_target, test_target = train_test_split(features, target, test_size=0.2, random_state=0)



In [2]:
'''# Train a random forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=0)
rf_classifier.fit(train_features, train_target)
'''

'# Train a random forest classifier\nrf_classifier = RandomForestClassifier(n_estimators=100, random_state=0)\nrf_classifier.fit(train_features, train_target)\n'

In [9]:
# Train and evaluate the Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier(random_state=0)
dtc.fit(train_features, train_target)
dtc_y_pred = dtc.predict(test_features)
dtc_accuracy = accuracy_score(test_target, dtc_y_pred)

# Train and evaluate the Logistic Regression Classifier
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=0)
lr.fit(train_features, train_target)
lr_y_pred = lr.predict(test_features)
lr_accuracy = accuracy_score(test_target, lr_y_pred)

# Train and evaluate the KNN Classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(train_features, train_target)
knn_y_pred = knn.predict(test_features)
knn_accuracy = accuracy_score(test_target, knn_y_pred)

# Train and evaluate the Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=100,random_state=0)
rfc.fit(train_features, train_target)
rfc_y_pred = rfc.predict(test_features)
rfc_accuracy = accuracy_score(test_target, rfc_y_pred)

# Print the accuracies
print("Decision Tree Accuracy:", dtc_accuracy)
print("Logistic Regression Accuracy:", lr_accuracy)
print("KNN Accuracy:", knn_accuracy)
print("Random Forest Accuracy:", rfc_accuracy)

Decision Tree Accuracy: 0.7437551848096599
Logistic Regression Accuracy: 0.8108581436077058
KNN Accuracy: 0.8032998433035303
Random Forest Accuracy: 0.8258825698221034


In [10]:
# Ask the user for input
print('Please provide the following information:')
location = input('Location: ')
min_temp = float(input('Minimum temperature (in degrees Celsius): '))
max_temp = float(input('Maximum temperature (in degrees Celsius): '))
rainfall = float(input('Rainfall (in mm): '))
wind_gust_dir = input('Wind gust direction: ')
wind_gust_speed = float(input('Wind gust speed (in km/h): '))
wind_dir_9am = input('Wind direction at 9am: ')
wind_speed_9am = float(input('Wind speed at 9am (in km/h): '))
humidity_9am = float(input('Humidity at 9am: '))
pressure_9am = float(input('Pressure at 9am (in hPa): '))
cloud_9am = float(input('Cloud at 9am: '))
temp_9am = float(input('Temperature at 9am (in degrees Celsius): '))
humidity_3pm = float(input('Humidity at 3pm: '))
pressure_3pm = float(input('Pressure at 3pm (in hPa): '))
cloud_3pm = float(input('Cloud at 3pm: '))
temp_3pm = float(input('Temperature at 3pm (in degrees Celsius): '))
rain_today = input('Did it rain today? (yes or no): ')



Please provide the following information:
Location: Albury
Minimum temperature (in degrees Celsius): 15
Maximum temperature (in degrees Celsius): 27
Rainfall (in mm): 1.1
Wind gust direction: NE
Wind gust speed (in km/h): 45
Wind direction at 9am: N
Wind speed at 9am (in km/h): 6
Humidity at 9am: 80
Pressure at 9am (in hPa): 1011
Cloud at 9am: 8
Temperature at 9am (in degrees Celsius): 18
Humidity at 3pm: 83
Pressure at 3pm (in hPa): 1008
Cloud at 3pm: 1
Temperature at 3pm (in degrees Celsius): 21
Did it rain today? (yes or no): No


In [11]:
# Encode the user input
location_encoded = encoder1.transform([location])[0]
wind_gust_dir_encoded = encoder2.transform([wind_gust_dir])[0]
wind_dir_9am_encoded = encoder3.transform([wind_dir_9am])[0]
rain_today_encoded = encoder5.transform([rain_today])[0]  # Fix incorrect encoder index
user_input = np.array([[location_encoded, min_temp, max_temp, rainfall, wind_gust_dir_encoded, wind_gust_speed, wind_dir_9am_encoded, wind_speed_9am, humidity_9am, pressure_9am, cloud_9am, temp_9am, humidity_3pm, pressure_3pm, cloud_3pm, temp_3pm, rain_today_encoded, 0, 0]])  # Add missing columns and set dummy values
user_input = scaler.transform(user_input)
user_input = user_input.reshape(1, -1)  # Convert to 2D array
prediction = rfc.predict(user_input)
decoded_prediction = encoder6.inverse_transform(prediction)
print('The predicted rainfall for tomorrow is:', decoded_prediction)


The predicted rainfall for tomorrow is: ['No']
