## Minimal example of Ordinary Least Squares regression


### Import the packages we need

In [3]:
'''
---------------------------------------------------------------------
Minimal example of Ordinary Least Squares regression

                                           Roderick Brown, 31/3/2020
 --------------------------------------------------------------------
'''
# This line enables interactive plots
%matplotlib notebook

# The line below is not a cooment, it sets ascii code to use
# -*- coding: utf-8 -*-

# Import required modules
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt


### Create some data

Use `numpy.array` routine to create arrays containing x values and y values. We have set the data type to _float_, using `dtype=float`, which means decimal numbers, i.e. from floating point

In [16]:
# Create two arrays with the x and y values
x = np.array([1.1,2.3,3.1,3.8,5.1], dtype=float)
y = np.array([3.2,6.5,6.8,9.2,10.9], dtype=float)

# print the array to the screen
print ('x values:', x)
print ('y values:', y)

x values: [1.1 2.3 3.1 3.8 5.1]
y values: [ 3.2  6.5  6.8  9.2 10.9]


### Plot the data on a graph

In [17]:
# Plot the x and y values
fig = plt.figure(1)  # This line creates a figure object to plot to
plt.scatter(x,y) # This line draws a scatter plot in the current figure object
plt.show()

<IPython.core.display.Javascript object>

### Do the regression stuff...

In [18]:
# Call the scipy.stats.linregress routine
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)

# Print some of the returned values of interest
print ('OLS slope: ',slope)
print ('OLS intercept: ',intercept)
print ('R: ',r_value**2)


OLS slope:  1.9031551270815075
OLS intercept:  1.4582822085889573
R:  0.964795461295084


### Plot the data and the fitted straight line

In [43]:
# Create a new plot with the x and y data and the fitted line
fig = plt.figure(2)

plt.xlim(0,6)   # Set the limits of the x axis
plt.ylim(0,12)  # Set the limits of the y axis

plt.scatter(x,y,label='data') # Plot the data

# Set limits for plotting fitted line, from x=0 to x max
xline = [0,x[-1]]  # Index -1 means count from the end, i.e. last element in array x
yline = [intercept,(x[-1]*slope+intercept)]

plt.plot(xline, yline,'r--',label='OLS regression extended') # Plot the fitted line

# Just plot fitted line using fitted parameters directly
plt.plot(x,((x*slope) + intercept), 'r-',label='OLS regression line') # Plot the fitted line

plt.ylabel('Y values')
plt.xlabel('X values')

plt.legend(loc='upper left')

# Write plot to pdf file. Change file extension to *.png or *.tif as required)
#plt.savefig('ols_example.pdf', format='pdf')

# Show the plot on screen
plt.show()


<IPython.core.display.Javascript object>