In [121]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [337]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [338]:
def forward_propagate(X, w, b):
    Z = np.dot(w.T, X) + b
    A = sigmoid(Z)
    return A

In [339]:
def backward_propagate(w, b, X, Y, A, m, n0, n1, learning_rate):
    dz = n0/n1 * (Y - Y * A) + A - Y * A
    dw = 1/m * np.dot(X, dz.T)
    db = 1/m * np.sum(dz)
    return dw, db

In [355]:
def initiate_params(n, m):
    w = np.zeros((n, 1))
    b = np.zeros((1, 1))
    return w, b

In [370]:
def logistic_regression(X, Y, learning_rate=0.1, num_iterations=10000):
    n, m = X.shape
    n0 = np.count_nonzero(Y == 0)
    n1 = np.count_nonzero(Y != 10)
    w, b = initiate_params(n, m)
    
    for i in range(num_iterations):
        A = forward_propagate(X, w, b)
        dw, db = backward_propagate(w, b, X, Y, A, m, n0, n1, learning_rate)
        w = w - learning_rate * dw
        b = b - learning_rate * db
        L = - n0/n1 * (Y * A + (1-Y) * np.log(1-A)) # loss
        cost = 1/m * np.sum(L)
        if i % 50 == 0:
            print(cost)
    return w, b, cost

In [395]:
def load_data():
    df = pd.read_excel('df_rain_erosion_daily.xlsx')
    df = df.dropna()
    rain_df = df[['plowing_2008', 'other_basic_cultivation_2008']]
    erosion_df = df[['raindrop_erosion_2008']]
    X_train, X_test, y_train, y_test = train_test_split(rain_df, erosion_df, test_size=0.33)
    
    return X_train.transpose().to_numpy(), X_test.transpose().to_numpy(), y_train.transpose().to_numpy(), y_test.transpose().to_numpy()
X_train, X_test, y_train, y_test = load_data()

In [402]:
w, b, cost = logistic_regression(X_train, y_train, 0.004, 10000)

0.2068787347333571
0.18808738065927244
0.1713849260275193
0.156533717845503
0.14331227104646743
0.13151895950372702
0.12097365547942647
0.11151777632101785
0.10301322498429605
0.09534066129209995
0.08839745166482808
0.08209554662377012
0.07635944664023478
0.07112434660584183
0.06633449886962056
0.061941801987303695
0.05790460308515386
0.05418669203247736
0.050756462074567445
0.047586211685688745
0.04465156443563335
0.041930986526205506
0.03940538469890402
0.0370577700953079
0.034872976207821475
0.032837421240475444
0.030938907016248265
0.029166448055603558
0.027510125657413793
0.02596096278537187
0.024510816343424113
0.023152284049775013
0.021878623621498835
0.020683682385889503
0.019561835760571737
0.018507933308141388
0.017517251285391697
0.016585450782032524
0.015708540687108093
0.014882844839283822
0.014104972814717419
0.013371793887241173
0.012680413763151266
0.0120281537494885
0.011412532062276673
0.01083124702134297
0.01028216191236323
0.009763291325688163
0.00927278880616537
0.

In [403]:
def predict(X, w, b):
    A_predicted = forward_propagate(X, w, b)
    return A_predicted

In [404]:
predict(X_train, w, b)

array([[1.67520078e-07, 7.18197188e-05, 4.38930249e-08, 1.67520078e-07,
        1.02373506e-10, 1.67520078e-07, 4.38930249e-08, 4.38930249e-08,
        1.67520078e-07, 4.38930249e-08, 4.38930249e-08, 1.02373506e-10,
        4.38930249e-08, 7.18197188e-05, 4.38930249e-08, 1.02373506e-10,
        7.18197188e-05, 7.18197188e-05, 1.67520078e-07, 1.67520078e-07,
        4.38930249e-08, 7.18197188e-05, 4.38930249e-08, 1.02373506e-10,
        1.02373506e-10, 1.02373506e-10, 7.18197188e-05, 1.67520078e-07,
        1.02373506e-10, 1.02373506e-10, 1.67520078e-07, 7.18197188e-05,
        4.38930249e-08, 1.67520078e-07, 1.67520078e-07, 1.67520078e-07,
        4.38930249e-08]])