### Example: source link
https://medium.com/machine-learning-with-python/linear-regression-implementation-in-python-2de514d3a34e

Predicting sales based on the money spent on TV for marketing. In this case, there is only one independent variable, i.e., money spent on TV for marketing, and one dependent variable, i.e., sales, that is the value to be predicted.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import math
from sklearn.model_selection import train_test_split
import csv

In [2]:
dataset = pd.read_csv('advertising.csv')

In [3]:
dataset.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [4]:
#Dropping the unnecessary columns

dataset.drop(columns=['Radio','Newspaper'], inplace=True)

In [5]:
dataset.head()

Unnamed: 0,TV,Sales
0,230.1,22.1
1,44.5,10.4
2,17.2,12.0
3,151.5,16.5
4,180.8,17.9


In [6]:
#Setting the value for X and Y

x_train = dataset['TV']
y_train = dataset['Sales']


# x = dataset[['TV']]
# y = dataset['Sales']

In [7]:
# UNQ_C1
# GRADED FUNCTION: compute_cost

def compute_cost(x, y, w, b): 
    """
    Computes the cost function for linear regression.
    
    Args:
        x (ndarray): Shape (m,) Input to the model (Population of cities) 
        y (ndarray): Shape (m,) Label (Actual profits for the cities)
        w, b (scalar): Parameters of the model
    
    Returns
        total_cost (float): The cost of using w,b as the parameters for linear regression
               to fit the data points in x and y
    """
    # number of training examples
    m = x.shape[0] 
    
    # You need to return this variable correctly
    total_cost = 0
    
    ### START CODE HERE ###
    cost=0
    for i in range(m):
        f_wb=w*x[i] + b
        cost=cost + (f_wb - y[i])**2
    total_cost= cost/(2*m)
    
    ### END CODE HERE ### 

    return total_cost

In [8]:
# Compute cost with some initial values for paramaters w, b
initial_w = 2
initial_b = 1

cost = compute_cost(x_train, y_train, initial_w, initial_b)
print(type(cost))
print(f'Cost at initial w: {cost:.3f}')

# # Public tests
# from public_tests import *
print(compute_cost)

<class 'numpy.float64'>
Cost at initial w: 53055.773
<function compute_cost at 0x000002CA79E18FE0>


In [9]:
# <!-- # UNQ_C2 -->
# <!-- # GRADED FUNCTION: compute_gradient
def compute_gradient(x, y, w, b):  
    """
    Computes the gradient for linear regression 
    Args:
      x (ndarray): Shape (m,) Input to the model (Population of cities) 
      y (ndarray): Shape (m,) Label (Actual profits for the cities)
      w, b (scalar): Parameters of the model  
    Returns
      dj_dw (scalar): The gradient of the cost w.r.t. the parameters w
      dj_db (scalar): The gradient of the cost w.r.t. the parameter b     
     """
    
    # Number of training examples
    m = x.shape[0]
    
    # You need to return the following variables correctly
    dj_dw = 0
    dj_db = 0
    
    ### START CODE HERE ###
    for i in range(m):
        f_wb = w*x[i] + b
        dj_dw = dj_dw + (f_wb - y[i])*x[i]
        dj_db = dj_db + (f_wb - y[i])
        
    dj_dw = dj_dw/m
    dj_db = dj_db/m
    
    ### END CODE HERE ### 
        
    return dj_dw, dj_db

In [10]:

# Compute and display gradient with w initialized to zeroes
initial_w = 0
initial_b = 0

tmp_dj_dw, tmp_dj_db = compute_gradient(x_train, y_train, initial_w, initial_b)
print('Gradient at initial w, b (zeros):', tmp_dj_dw, tmp_dj_db)

print(compute_gradient)

Gradient at initial w, b (zeros): -2631.610449999999 -15.130500000000005
<function compute_gradient at 0x000002CA79E78220>


In [11]:

# Compute and display cost and gradient with non-zero w
test_w = 0.2
test_b = 0.2
tmp_dj_dw, tmp_dj_db = compute_gradient(x_train, y_train, test_w, test_b)

print('Gradient at test w, b:', tmp_dj_dw, tmp_dj_db)

Gradient at test w, b: 3188.9164399999995 14.477999999999994


In [12]:
def gradient_descent(x, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 
    """
    Performs batch gradient descent to learn theta. Updates theta by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
      x :    (ndarray): Shape (m,)
      y :    (ndarray): Shape (m,)
      w_in, b_in : (scalar) Initial values of parameters of the model
      cost_function: function to compute cost
      gradient_function: function to compute the gradient
      alpha : (float) Learning rate
      num_iters : (int) number of iterations to run gradient descent
    Returns
      w : (ndarray): Shape (1,) Updated values of parameters of the model after
          running gradient descent
      b : (scalar)                Updated value of parameter of the model after
          running gradient descent
    """
    
    # number of training examples
    m = len(x)
    
    # An array to store cost J and w's at each iteration — primarily for graphing later
    J_history = []
    w_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_dw, dj_db = gradient_function(x, y, w, b )  

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               
        b = b - alpha * dj_db               

        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            cost =  cost_function(x, y, w, b)
            J_history.append(cost)

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0:
            w_history.append(w)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")
        
    return w, b, J_history, w_history #return w and J,w history for graphing

In [13]:

# # Normalize the feature
# x_train_norm = (x_train - x_train.mean()) / x_train.std()


# # initialize fitting parameters. Recall that the shape of w is (n,)
# initial_w = 0.
# initial_b = 0.

# # some gradient descent settings
# iterations = 1000
# alpha = 0.01

# w, b, _, _ = gradient_descent(x_train_norm, y_train, initial_w, initial_b, 
#                               compute_cost, compute_gradient, alpha, iterations)

# print("w,b found by gradient descent (normalized x):", w, b)



In [18]:
# Step 1: Normalize training feature
x_mean = x_train.mean()
x_std = x_train.std()
x_train_norm = (x_train - x_mean) / x_std

# Step 2: Initialize parameters
initial_w = 0.
initial_b = 0.
iterations = 1500
alpha = 0.1  # can tune this now

# Step 3: Train the model
w, b, J_history, w_history = gradient_descent(x_train_norm, y_train, initial_w, initial_b, compute_cost, compute_gradient, alpha, iterations)

print("Trained parameters:")
print(f"w = {w:.4f}, b = {b:.4f}")

# Step 4: Define prediction function using normalization stats
def predict(x, w, b, x_mean, x_std):
    x_norm = (x - x_mean) / x_std
    return w * x_norm + b


Iteration    0: Cost   104.47   
Iteration  150: Cost     2.61   
Iteration  300: Cost     2.61   
Iteration  450: Cost     2.61   
Iteration  600: Cost     2.61   
Iteration  750: Cost     2.61   
Iteration  900: Cost     2.61   
Iteration 1050: Cost     2.61   
Iteration 1200: Cost     2.61   
Iteration 1350: Cost     2.61   
Trained parameters:
w = 4.7619, b = 15.1305


In [19]:

# # Example prediction on raw TV ad spend
# example_tv_spend = [[230.1,44.5,17.2,151.5]]
# predicted_sales = predict(example_tv_spend, w, b, x_mean, x_std)
# print(f"Predicted sales for TV ad spend {example_tv_spend}: {predicted_sales:.2f}")


In [20]:
# List of TV ad spends to predict for
tv_spend_list = [230.1, 44.5, 17.2, 151.5]

for tv_spend in tv_spend_list:
    predicted_sales = predict(tv_spend, w, b, x_mean, x_std)
    print(f"Predicted sales for TV ad spend {tv_spend}: {predicted_sales:.2f}")


Predicted sales for TV ad spend 230.1: 19.74
Predicted sales for TV ad spend 44.5: 9.44
Predicted sales for TV ad spend 17.2: 7.93
Predicted sales for TV ad spend 151.5: 15.38
