In [None]:
!pip install -qy pandas==1.3.4 numpy==1.21.4 seaborn==0.9.0 matplotlib==3.5.0 scikit-learn==0.20.1

In [None]:
def warn(*args, **kwargs):
    pass


import warnings

warnings.warn = warn

In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import jaccard_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix, accuracy_score
import sklearn.metrics as metrics

### Importing the Dataset


In [None]:
from pyodide.http import pyfetch


async def download(url, filename):
    response = await pyfetch(url)
    if response.status == 200:
        with open(filename, "wb") as f:
            f.write(await response.bytes())

In [None]:
await download(path, "Weather_Data.csv")
filename = "Weather_Data.csv"

In [None]:
df = pd.read_csv("Weather_Data.csv")

In [None]:
df.head()

### Data Preprocessing


#### One Hot Encoding


In [None]:
df_sydney_processed = pd.get_dummies(
    data=df, columns=["RainToday", "WindGustDir", "WindDir9am", "WindDir3pm"]
)

In [None]:
df_sydney_processed.replace(["No", "Yes"], [0, 1], inplace=True)

### Training Data and Test Data


In [None]:
df_sydney_processed.drop("Date", axis=1, inplace=True)

In [None]:
df_sydney_processed = df_sydney_processed.astype(float)

In [None]:
features = df_sydney_processed.drop(columns="RainTomorrow", axis=1)
Y = df_sydney_processed["RainTomorrow"]

### Linear Regression


`train_test_split` function to split the `features` and `Y` dataframes with a `test_size` of `0.2` and the `random_state` set to `10`.


In [None]:
train_X, test_X, train_y, test_y = train_test_split(
    features, Y, test_size=0.2, random_state=20
)

print("Train X dimensions: ", train_X.shape)
print("Train y size      : ", train_y.size)
print("Test X dimensions : ", test_X.shape)
print("Test y size       : ", test_y.size)

#### Training a Linear Regression


In [None]:
LinearReg = LinearRegression().fit(train_X, train_y)
LinearReg

In [None]:
predictions = LinearReg.predict(test_X)
# print(predictions)

In [None]:
LinearRegression_MAE = np.mean(np.absolute(predictions - test_y))
LinearRegression_MSE = np.mean((predictions - test_y) ** 2)
LinearRegression_R2 = metrics.r2_score(test_y, predictions)

#### MAE, MSE, and R2.


In [None]:
Report = pd.DataFrame(
    {
        "Metric": ["MAE  ", "MSE  ", "R2   "],
        "Values": [
            round(LinearRegression_MAE, 2),
            round(LinearRegression_MSE, 2),
            round(LinearRegression_R2, 2),
        ],
    }
)
print(Report)

### KNN


In [None]:
KNN = KNeighborsClassifier(n_neighbors=4).fit(train_X, train_y)

In [None]:
predictions = KNN.predict(test_X)

In [None]:
KNN_Accuracy_Score = accuracy_score(test_y, predictions)
KNN_JaccardIndex = jaccard_score(test_y, predictions, pos_label=0)
KNN_F1_Score = f1_score(test_y, predictions)

print("Accuracy Score : ", round(KNN_Accuracy_Score), 2)
print("Jaccard Index  : ", round(KNN_JaccardIndex, 2))
print("F1 Score       : ", round(KNN_F1_Score, 2))

### Decision Tree


In [None]:
Tree = DecisionTreeClassifier(criterion="entropy", max_depth=10).fit(train_X, train_y)
Tree

In [None]:
predictions = Tree.predict(test_X)

In [None]:
Tree_Accuracy_Score = accuracy_score(test_y, predictions)
Tree_JaccardIndex = jaccard_score(test_y, predictions, pos_label=0)
Tree_F1_Score = f1_score(test_y, predictions)

print("Accuracy Score : ", round(Tree_Accuracy_Score), 2)
print("Jaccard Index  : ", round(Tree_JaccardIndex, 2))
print("F1 Score       : ", round(Tree_F1_Score, 2))

### Logistic Regression


In [None]:
x_train, x_test, y_train, y_test = train_test_split(
    features, Y, test_size=0.2, random_state=1
)

print("Train X dimensions: ", x_train.shape)
print("Train y size      : ", y_train.size)
print("Test X dimensions : ", x_test.shape)
print("Test y size       : ", y_test.size)

In [None]:
LR = LogisticRegression(C=0.01, solver="liblinear").fit(x_train, y_train)
LR

In [None]:
predictions = LR.predict(x_test)

In [None]:
predict_proba = LR.predict_proba(x_test)

In [None]:
LR_Accuracy_Score = accuracy_score(y_test, predictions)
LR_JaccardIndex = jaccard_score(y_test, predictions, pos_label=0)
LR_F1_Score = f1_score(y_test, predictions)
LR_Log_Loss = log_loss(y_test, predict_proba)

print("Accuracy Score : ", round(LR_Accuracy_Score), 2)
print("Jaccard Index  : ", round(LR_JaccardIndex, 2))
print("F1 Score       : ", round(LR_F1_Score, 2))
print("Log Loss       : ", round(LR_Log_Loss, 2))

### SVM


In [None]:
SVM = svm.SVC(kernel="rbf").fit(x_train, y_train)
SVM

In [None]:
predictions = SVM.predict(x_test)

In [None]:
SVM_Accuracy_Score = accuracy_score(y_test, predictions)
SVM_JaccardIndex = jaccard_score(y_test, predictions, pos_label=0)
SVM_F1_Score = f1_score(y_test, predictions)

print("Accuracy Score : ", round(SVM_Accuracy_Score), 2)
print("Jaccard Index  : ", round(SVM_JaccardIndex, 2))
print("F1 Score       : ", round(SVM_F1_Score, 2))

### Report


In [None]:
Report = pd.DataFrame(
    [
        [
            round(KNN_Accuracy_Score, 2),
            round(KNN_JaccardIndex, 2),
            round(KNN_F1_Score, 2),
            None,
        ],
        [
            round(Tree_Accuracy_Score, 2),
            round(Tree_JaccardIndex, 2),
            round(Tree_F1_Score, 2),
            None,
        ],
        [
            round(LR_Accuracy_Score, 2),
            round(LR_JaccardIndex, 2),
            round(LR_F1_Score, 2),
            round(LR_Log_Loss, 2),
        ],
        [
            round(SVM_Accuracy_Score, 2),
            round(SVM_JaccardIndex, 2),
            round(SVM_F1_Score, 2),
            None,
        ],
    ],
    index=[
        "KNNeighbors",
        "DecisionsTree",
        "Logistic Regression",
        "SupportVectorMachine",
    ],
    columns=["Accuracy Score", "Jaccard Index", "F1 Score", "Log Loss"],
)

print(Report)