Need numpy, sklearn, matplotlib, pandas

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# splits the dataset into 70:30 or 80:20 train-test ratio
from sklearn.model_selection import train_test_split
# need this when no dataset is provided.
from sklearn import datasets


The linear regression class needs a learning rate, a iterations number, a bias value, some weight.

In [None]:
# define the class LinearRegression


class LinearRegression:

  def __init__(self, learning_rate=0.001, number_of_iterations=100):
    self.lr = learning_rate  # alpha
    self.n_iters = number_of_iterations  # m
    self.weights = None
    self.bias = None

  def fit(self, X, y):
    # gradient_descent algorithm
    # init params
    n_samples, n_features = X.shape  # samples -> Numbers_of_m/records, features-> Numbers_of_thetas
    self.bias = 0
    self.weights = np.zeros(n_features)  # zeroing out the thetas

    # iterative gradient descent
    for _ in range(self.n_iters):
      # np.dot => does matrix multiplication (or atleast thats what we need to know rn)
      y_predicted = np.dot(X, self.weights) + self.bias

      dw = (1 / n_samples) * np.dot(X.T, (y_predicted, y))
      db = (1 / n_samples) * np.sum(y_predicted - y)
      if (db == 0 and dw == 0):
        break
      self.weights -= self.lr * dw
      self.bias -= self.lr * db

  def predict(self, X):
    return np.dot(X, self.weights) + self.bias


In [None]:
X,y = datasets.make_regression(n_samples=100,n_features=1,noise=20,random_state=7)
# here make regression creates a randoly generated regression problem for us where rando_state is the seed.
# n_samples is the records ie m
# and n_features is the theta

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=7)
# train test split splits the data into train and test sizes. Either provide test_size or train_size.

print(X.shape,y.shape,X_train.shape,X_test.shape,y_train.shape,y_test.shape)


In [None]:
fig= plt.figure(figsize=(8,6)) # figsize determines the size of the plt here 8x6 inches
plt.scatter(X[:,0],y,color='b',marker='o',s=30)
# scatter plot takes x axis and y axis values/arrays and colors, marker_types and marker_size.
# X[:,0] means if X is a multidimensional array take only the element on the first column and return an array
# : - this is a shorthand notation that means "select all indices along this dimension". In this case, it selects all rows of 0th column.

plt.show()

In [None]:
regressor = LinearRegression(learning_rate=0.1)
regressor.fit(X_train,y_train)
predicted_value= regressor.predict(X_test)

In [None]:
def mse(y_true,y_test):
  # mean square error
  return np.mean((y_true-y_test)**2)

In [None]:
print(mse(y_test,predicted_value))

In [None]:
def drawRegressionLine(X_train,X_test,y_train,y_test,y_predicted):
  cmap= plt.get_cmap('viridis')
  fig= plt.figure(figsize=(8,6))
  m1= plt.scatter(X_train,y_train,color=cmap(0.9),s=10)
  m2 = plt.scatter(X_test,y_test,color=cmap(0.5),s=10)
  plt.plot(X_test,y_predicted,color='black',linewidth=2,label='Prediction')
  plt.show()


In [None]:
drawRegressionLine(X_train,X_test,y_train,y_test,predicted_value)