In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

# Load the dataset
url = "https://raw.githubusercontent.com/dsrscientist/dataset3/main/weatherAUS.csv"
data = pd.read_csv(url)

# Display basic information about the dataset
print(data.info())
print(data.head())

# Drop columns that might not be relevant for prediction
data = data.drop(['Date', 'Location'], axis=1)

# Preprocessing
# Convert categorical variables to numerical using Label Encoding
label_encoder = LabelEncoder()
categorical_cols = ['WindGustDir', 'WindDir9am', 'WindDir3pm']
for col in categorical_cols:
    data[col] = label_encoder.fit_transform(data[col].fillna('N/A'))

# Fill missing numerical values with mean
data = data.fillna(data.mean())

# Split the data into features (X) and target (y) for both tasks
X_classification = data.drop('RainTomorrow', axis=1)
y_classification = data['RainTomorrow']
X_regression = data.drop('Rainfall', axis=1)
y_regression = data['Rainfall']

# Split the data into training and testing sets for both tasks
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_classification, y_classification, test_size=0.2, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_regression, y_regression, test_size=0.2, random_state=42)

# Train a RandomForestClassifier for classification task
clf_model = RandomForestClassifier(random_state=42)
clf_model.fit(X_train_class, y_train_class)
y_pred_class = clf_model.predict(X_test_class)
accuracy = accuracy_score(y_test_class, y_pred_class)
print("Classification Accuracy:", accuracy)

# Train a RandomForestRegressor for regression task
reg_model = RandomForestRegressor(random_state=42)
reg_model.fit(X_train_reg, y_train_reg)
y_pred_reg = reg_model.predict(X_test_reg)
mse = mean_squared_error(y_test_reg, y_pred_reg)
print("Regression Mean Squared Error:", mse)
