In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = 'rainfall_data.csv'
rainfall_data = pd.read_csv(file_path, encoding='ISO-8859-1')

# Select relevant columns for regression
features = rainfall_data[['Minimum temperature (°C)', 'Maximum temperature (°C)', 'Rainfall (mm)']]
target = rainfall_data['9am Temperature (°C)']  # Assume we want to predict 9am Temperature

# Drop rows with missing values
features = features.dropna()
target = target.dropna()

# Split the data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

print("Training and testing data split successfully.")


Training and testing data split successfully.


In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize and train the model
linear_regressor = LinearRegression()
linear_regressor.fit(X_train, y_train)

# Make predictions
y_pred = linear_regressor.predict(X_test)

# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Linear Regression MSE: {mse}, R^2: {r2}")


Linear Regression MSE: 2.0506722964850606, R^2: 0.7965210367038886


In [17]:
# Creating a final regression report
regression_report = pd.DataFrame({
    "Model": ["Linear Regression"],
    "Mean Squared Error": [mse],
    "R^2 Score": [r2]
})

print(regression_report)


               Model  Mean Squared Error  R^2 Score
0  Linear Regression            2.050672   0.796521


In [19]:
from sklearn.neighbors import KNeighborsRegressor

# Initialize and train the model
knn_regressor = KNeighborsRegressor(n_neighbors=5)
knn_regressor.fit(X_train, y_train)

# Make predictions
y_pred_knn = knn_regressor.predict(X_test)

# Calculate evaluation metrics
mse_knn = mean_squared_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

print(f"KNN Regression MSE: {mse_knn}, R^2: {r2_knn}")


KNN Regression MSE: 2.4486666666666665, R^2: 0.757029850334886


In [21]:
from sklearn.tree import DecisionTreeRegressor

# Initialize and train the model
tree_regressor = DecisionTreeRegressor(random_state=42)
tree_regressor.fit(X_train, y_train)

# Make predictions
y_pred_tree = tree_regressor.predict(X_test)

# Calculate evaluation metrics
mse_tree = mean_squared_error(y_test, y_pred_tree)
r2_tree = r2_score(y_test, y_pred_tree)

print(f"Decision Tree Regression MSE: {mse_tree}, R^2: {r2_tree}")


Decision Tree Regression MSE: 1.1500000000000006, R^2: 0.8858906865852649


In [23]:
from sklearn.linear_model import LogisticRegression

# Convert target variable to binary (classification task)
binary_target = (target > target.mean()).astype(int)

# Split the data
X_train, X_test, y_train_bin, y_test_bin = train_test_split(features, binary_target, test_size=0.2, random_state=42)

# Initialize and train the model
logistic_regressor = LogisticRegression(max_iter=1000)
logistic_regressor.fit(X_train, y_train_bin)

# Make predictions
y_pred_logistic = logistic_regressor.predict(X_test)

# Calculate evaluation metrics
accuracy = logistic_regressor.score(X_test, y_test_bin)

print(f"Logistic Regression Accuracy: {accuracy}")


Logistic Regression Accuracy: 0.8333333333333334


In [25]:
from sklearn.svm import SVR

# Initialize and train the model
svm_regressor = SVR()
svm_regressor.fit(X_train, y_train)

# Make predictions
y_pred_svm = svm_regressor.predict(X_test)

# Calculate evaluation metrics
mse_svm = mean_squared_error(y_test, y_pred_svm)
r2_svm = r2_score(y_test, y_pred_svm)

print(f"SVM Regression MSE: {mse_svm}, R^2: {r2_svm}")


SVM Regression MSE: 4.676888468856471, R^2: 0.5359334503491278


In [27]:
from sklearn.metrics import classification_report

# Creating a final classification report
class_report = classification_report(y_test_bin, y_pred_logistic, target_names=['Below Mean', 'Above Mean'])

print(class_report)


              precision    recall  f1-score   support

  Below Mean       0.80      1.00      0.89         4
  Above Mean       1.00      0.50      0.67         2

    accuracy                           0.83         6
   macro avg       0.90      0.75      0.78         6
weighted avg       0.87      0.83      0.81         6

