In mathematical notation, a dataset with "m" rows and "n" features can be represented as 
a matrix X with dimensions (m x n), where each element X[i,j] corresponds to 
the value of the j-th {feature} for the i-th {row}.

In [2]:
import numpy as np

def step_gradient(points, learning_rate, theta):
    num_features = len(theta)
    theta_slope = np.zeros(num_features)
    N = len(points)  # No. of rows
    for i in range(N):
        x = points[i, :-1]  # Input features (excluding the last column)
        y = points[i, -1]   # Target variable (last column)
        for j in range(num_features):
            theta_slope[j] += (-2/N) * (y - np.dot(theta, x)) * x[j]
    new_theta = theta - learning_rate * theta_slope
    return new_theta

# df.shape(rows, columns)
# df.shape[0]--->Rows
# df.shape[1]--->Columns
# df.shape[1]-1---> Exclude Last column
# theta = np.zeros(num_features)---> [0,0,....0]

def gradient_descent(data, learning_rate, num_iterations):
    num_features = data.shape[1] - 1 
    theta = np.zeros(num_features)
    for i in range(num_iterations):
        theta = step_gradient(data, learning_rate, theta)
        print(i, "Cost :", cost(data, theta))
    return theta

def cost(data, theta):
    total_cost = 0
    N = len(data)
    for i in range(N):
        x = data[i, :-1]
        y = data[i, -1]
        total_cost += (1/N) * (y - np.dot(theta, x)) ** 2
    return total_cost

def run():
    data = np.loadtxt("data.csv", delimiter=",")
    learning_rate = 0.0001
    num_iterations = 10
    theta = gradient_descent(data, learning_rate, num_iterations)
    print("Theta:", theta)

run()


0 Cost : 1485.652621595884
1 Cost : 458.4008018325014
2 Cost : 199.7274628352812
3 Cost : 134.590662860195
4 Cost : 118.18849815531217
5 Cost : 114.05825216366071
6 Cost : 113.01821066557271
7 Cost : 112.7563167445008
8 Cost : 112.69036896643561
9 Cost : 112.67376258850246
Theta: [1.47799182]
