Consider the dollars spend on advertising (in thousands) vs. sales revenue (in millions) data below that was collected by a separate business.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
dollars_spent = [1,2,3,4,5]
sales_revenue = [5.8,6.7,10.3,14.4,12.5]
plt.scatter(dollars_spent,sales_revenue,marker= "o",color = 'blue',label = "(x,y)- Observed Data")
plt.xlabel("Dollars spent on advertising (thousands)")
plt.ylabel("Sales revenue (millions)")
plt.legend()
plt.ylim([0, 15])
plt.xlim([0, 5.5])
plt.grid(True)
plt.savefig("new_data.png", dpi=300, bbox_inches='tight')
plt.show()

- Use wikki sticks to trace out the predictive model you think best fits this data.  
- What does the mathematical equation for your model look like? Write it down.  
- Do you think we can use the same approach for this data that we used for the other dataset last week?  
  What would you keep the same and what would you change?

Run the code cell below to see the Cost vs. Slope graph for the model $\hat{y} = mx$.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ---------------------------------------------
# Observed data: advertising dollars vs. sales revenue
# ---------------------------------------------
dollars_spent = [1,2,3,4,5]
sales_revenue = [5.8,6.7,10.3,14.4,12.5]

# ---------------------------------------------
# Step 1: Create a range of possible slope values (our model parameters)
# ---------------------------------------------
# We'll test slope values from 0 to 5 in steps of 0.5
slope_choices = np.arange(0, 5.5, 0.5)

# ---------------------------------------------
# Step 2: Generate predictions for each slope
# ---------------------------------------------
# Each slope defines a line:  ŷ = m * x
# We compute predicted y-values for each slope and each data point
predictions = slope_choices[:, np.newaxis] * dollars_spent

# ---------------------------------------------
# Step 3: Compute the cost for each slope
# ---------------------------------------------
# We'll use Mean Squared Error (MSE):
#     Cost = (1/n) * Σ (y - ŷ)²
cost = (1 / len(sales_revenue)) * np.sum((sales_revenue - predictions) ** 2, axis=1)

# ---------------------------------------------
# Step 4: Plot the cost function
# ---------------------------------------------
plt.plot(slope_choices, cost, marker="o", label="Cost")
plt.xlabel("Slope value (m)")
plt.ylabel("Cost (Mean Squared Error)")
plt.xlim([-.5, 5.5])
plt.ylim([0, 120])
plt.grid(True)
plt.legend()
plt.show()

Pick some test values for the $\texttt{init\_value}$ and see how it changes the plot of Cost vs. Slope.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ---------------------------------------------
# Observed data: advertising dollars vs. sales revenue
# ---------------------------------------------
dollars_spent = [1,2,3,4,5]
sales_revenue = [5.8,6.7,10.3,14.4,12.5]

# ---------------------------------------------
# Step 1: Create a range of possible slope values (our model parameters)
# ---------------------------------------------
# We'll test slope values from 0 to 5 in steps of 0.5
slope_choices = np.arange(0, 5.5, 0.5)
# We'll choose init_val = 2
init_val = 1
# ---------------------------------------------
# Step 2: Generate predictions for each slope
# ---------------------------------------------
# Each slope defines a line:  ŷ = m * x
# We compute predicted y-values for each slope and each data point
predictions = slope_choices[:, np.newaxis] * dollars_spent + init_val

# ---------------------------------------------
# Step 3: Compute the cost for each slope
# ---------------------------------------------
# We'll use Mean Squared Error (MSE):
#     Cost = (1/n) * Σ (y - ŷ)²
cost = (1 / len(sales_revenue)) * np.sum((sales_revenue - predictions) ** 2, axis=1)

# ---------------------------------------------
# Step 4: Plot the cost function
# ---------------------------------------------
plt.plot(slope_choices, cost, marker="o", label="Cost")
plt.xlabel("Slope value (m)")
plt.ylabel("Cost (Mean Squared Error)")
plt.xlim([-.5, 5.5])
plt.ylim([0, 120])
plt.grid(True)
plt.legend()
plt.show()

What we really want to do is optimize $m$ and $\texttt{init\_value}$ at the same time. 

How do you propose we do that? For example, how do you think the cost function should change?

Let's write out the cost function term-by-term.

In [None]:
import sympy as sp

??? = sp.symbols('???')

expr = (???)

print("Simplified:", sp.simplify(expr))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ---------------------------------------------
# Observed data: advertising dollars vs. sales revenue
# ---------------------------------------------
dollars_spent = np.array([1,2,3,4,5])
sales_revenue = np.array([5.8,6.7,10.3,14.4,12.5])

# ---------------------------------------------
# Step 1: Create a range of possible slope values (our model parameters)
# ---------------------------------------------
# We'll test slope values from 0 to 5 in steps of 0.5
slopes = np.arange(0, 5.5, .5)
# ---------------------------------------------
# Step 2: Create a range of possible initial values (our model parameters)
# ---------------------------------------------
# We'll test init_values -2 to 10 in steps of 0.5
init_vals = np.arange(-2,10,.5)

# ---------------------------------------------
# Step 3: Compute the cost for each slope
# ---------------------------------------------
# We'll use Mean Squared Error (MSE):
#     Cost = (1/n) * Σ (y - ŷ)²
mse = np.zeros((len(init_vals), len(slopes)))

for i, init_val in enumerate(init_vals):
    for j, m in enumerate(slopes):
        y_pred = m * dollars_spent + init_val
        mse[i, j] = np.mean((sales_revenue - y_pred) ** 2)

# Plot heatmap
plt.figure(figsize=(10, 8))
plt.imshow(
    mse,
    extent=[slopes.min(), slopes.max(), init_vals.min(), init_vals.max()],
    origin='lower',
    aspect='auto',
    cmap='viridis'
)

# Add value labels
for i, init_val in enumerate(init_vals):
    for j, m in enumerate(slopes):
        plt.text(m, init_val, f"{mse[i, j]:.1f}", ha='left', va='center', color='white', fontsize=8)

plt.colorbar(label='Mean Squared Error')
plt.xlabel('Slope')
plt.ylabel('Init Value')

We have some feeling the derivative is involved, but how?

$\textbf{Partial Derivatives}$. Partial derivatives measure how sensitive a model’s output is to small changes in each input variable while keeping the others constant. In other words, they tell us which parameters or inputs have the greatest influence on the model’s predictions. A large partial derivative means the model is highly sensitive to that variable, while a small one means it has little effect.

Great, how do we figure out what the partial derivative is???

In [None]:
#import packages
import sympy as sp
import numpy as np

# Define the input variable
m = sp.Symbol('m')

# Define the function
C = ???
print("Function:", C)

# Take the derivative
C_prime_m = sp.diff(C, m)
print("Derivative with respect to m:", C_prime_m)

In [None]:
#import packages
import sympy as sp
import numpy as np

# Define the input variable
b = sp.Symbol('b')

# Define the function
C = ???
print("Function:", C)

# Take the derivative
C_prime_b = sp.diff(C, b)
print("Derivative with respect to b:", C_prime_b)

Next let's use Gradient Descent to find the best values for $m$ and $init\_val$.

But what does the update look like?

In [None]:
def gradient_func_m(m):
    return ???
#choice of learning rate
eta = .05
#current m value
curr_m = .75
#gradient at curr_m
gradient_m = gradient_func_m(curr_m)
print("The gradient at m = {} is {}.".format(curr_m,gradient_m))
#next m
next_m = curr_m-(gradient_m*eta)
print("The next guess for the slope is {}.".format(next_m))

In [None]:
def gradient_func_b(b):
    return ???
#choice of learning rate
eta = .05
#current m value
curr_b = .75
#gradient at curr_b
gradient_b = gradient_func_b(curr_b)
print("The gradient at b = {} is {}.".format(curr_b,gradient_b))
#next m
next_b = curr_b-(gradient_b*eta)
print("The next guess for the initial value is {}.".format(next_b))

In [None]:
# make a function to do the dirty work for us
def cost_func(m,init_val,input_vals,output_vals):
    return (1/len(output_vals))*np.sum((output_vals-m*input_vals+init_val)**2)
# the input and ouput vals depend on the observed data
dollars_spent = np.array([1,2,3,4,5])
sales_revenue = np.array([5.8,6.7,10.3,14.4,12.5])

# choose an m
curr_m = .75
# choose a b
curr_b = .75
# save the cost for a given choice of m in the variable curr_cost
curr_cost = cost_func(curr_m,curr_b,dollars_spent,sales_revenue)
# print the result
print(curr_cost)