In [1]:
import numpy as np

class LinearRegression:
    # learning rate = 0.01
    def __init__(self, learning_rate=1e-3, n_iters=1000):
        # init parameters
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def _get_prediction(self, X):
        return np.dot(X, self.weights) + self.bias
    
    def _init_params(self):
        self.weights = np.zeros(self.n_features)
        self.bias = 0
    
    def _update_params(self, dw, db):
        self.weights -= self.lr * dw
        self.bias -= self.lr * db
    
    def _get_gradients(self, X, y, y_pred):
        # get distance between y_pred and y_true
        error = y_pred - y
        # compute the gradients of weight & bias
        dw = (1 / self.n_samples) * np.dot(X.T, error)
        db = (1 / self.n_samples) * np.sum(error)
        return dw, db
    
    def fit(self, X, y):
        # get number of samples & features
        self.n_samples, self.n_features = X.shape
        # init weights & bias
        self._init_params()

        # perform gradient descent for n iterations
        for _ in range(self.n_iters):
            # get y_prediction
            y_pred = self._get_prediction(X)
            # compute gradients
            dw, db = self._get_gradients(X, y, y_pred)
            # update weights & bias with gradients
            self._update_params(dw, db)
    
    def predict(self, X):
        y_pred = self._get_prediction(X)
        return y_pred
    
# Testing
if __name__ == "__main__":

    import matplotlib.pyplot as plt
    from sklearn import datasets
    from sklearn.model_selection import train_test_split

    # create metric helper function
    def rmse(y_true, y_pred):
        return np.sqrt(np.mean((y_pred - y_true)**2))

    # get dataset
    X, y = datasets.make_regression(
        n_samples=1000, n_features=1, noise=20, random_state=1
    )

    # split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

    # instantiate regressor
    linreg = LinearRegression(learning_rate=0.01, n_iters=1000)
    linreg.fit(X_train, y_train)

    predictions = linreg.predict(X_test)
    # Testing
if __name__ == "__main__":

    import matplotlib.pyplot as plt
    from sklearn import datasets
    from sklearn.model_selection import train_test_split

    # create metric helper function
    def rmse(y_true, y_pred):
        return np.sqrt(np.mean((y_pred - y_true)**2))

    # get dataset
    X, y = datasets.make_regression(
        n_samples=1000, n_features=1, noise=20, random_state=1
    )

    # split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

    # instantiate regressor
    linreg = LinearRegression(learning_rate=0.01, n_iters=1000)
    linreg.fit(X_train, y_train)

    predictions = linreg.predict(X_test)
    print(f"RMSE: {rmse(y_test, predictions)}")

    


RMSE: 20.487988968355808


In [2]:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

    # create metric helper function
def rmse(y_true, y_pred):
        return np.sqrt(np.mean((y_pred - y_true)**2))

    # get dataset
X, y = datasets.make_regression(
    n_samples=1000, n_features=1, noise=20, random_state=1
    )


In [3]:
X

array([[-1.06787658e+00],
       [-2.91594596e-01],
       [ 5.08077548e-02],
       [ 6.21803504e-01],
       [-5.45774168e-01],
       [ 1.55501599e+00],
       [-2.09752935e-01],
       [ 1.78975468e+00],
       [-7.67803746e-01],
       [ 1.47073986e+00],
       [-2.86384915e-01],
       [-3.64538050e-01],
       [ 4.03491642e-01],
       [ 3.37220938e-01],
       [ 8.64644065e-02],
       [-1.67419581e+00],
       [-9.24323185e-02],
       [ 5.20576337e-01],
       [ 1.46089238e+00],
       [-9.15424368e-01],
       [-1.61198320e-01],
       [ 1.57546791e+00],
       [ 7.44884536e-01],
       [ 1.04499441e+00],
       [-2.79099641e+00],
       [-7.47158294e-01],
       [-1.11711069e+00],
       [-1.09033833e+00],
       [ 8.59870972e-01],
       [ 1.90915485e-01],
       [-3.31283170e-01],
       [-4.23478297e-01],
       [ 3.19656942e-01],
       [-1.94504696e+00],
       [-8.80577600e-01],
       [ 8.38634747e-01],
       [-1.56450785e+00],
       [ 8.16043684e-01],
       [-7.3

In [4]:
y

array([-3.35039904e+00,  3.09689666e+00,  2.74963013e+01,  5.24339879e+01,
       -1.01187574e+01,  7.30580671e+01,  1.64687657e+00,  7.99640580e+01,
       -2.84462079e+01,  6.75553361e+01, -3.48570278e+01, -1.88853710e+01,
       -1.00521817e+01,  2.48379089e+01, -2.04566818e+01, -3.53840015e+01,
        8.16016773e+00,  3.87719815e+00,  5.38699282e+01, -2.05066429e+01,
       -2.00263681e+01,  4.58661196e+01,  3.11933930e+01,  2.97838630e+01,
       -1.15184070e+02, -2.71869184e+01, -7.03013499e+01,  2.29488260e+00,
        1.31662911e+01,  5.17017420e+00, -5.20664765e+00,  2.75451418e+01,
        3.14616351e+01, -9.15353395e+01, -2.37639523e+01,  5.66567332e+01,
       -1.10986525e+02,  2.72387294e+01,  1.56457882e-01,  4.18535977e+01,
        1.44510489e+01, -1.08717990e+02, -4.08966324e+01, -1.19060720e+01,
       -1.44299323e+01,  3.56194715e+01, -5.13432955e+01,  4.78195347e+01,
        9.07726179e+01, -8.83786226e+00,  4.53748973e+01,  5.90985303e+01,
       -2.43150340e+01,  

In [5]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg

In [6]:
 # split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)


In [7]:
X_train

array([[-1.75458969e-01],
       [-1.00016919e+00],
       [-5.30119800e-01],
       [ 1.19891788e+00],
       [-5.62305431e-01],
       [ 9.66539250e-01],
       [-9.15424368e-01],
       [ 7.00982122e-01],
       [ 6.18380262e-01],
       [ 1.20660790e+00],
       [ 2.05117344e+00],
       [-1.47115693e+00],
       [ 1.70548352e+00],
       [-1.18110317e+00],
       [ 5.62761097e-01],
       [ 8.53282186e-01],
       [ 1.40925339e+00],
       [ 9.25501215e-01],
       [ 1.22372221e+00],
       [-1.98937450e-01],
       [ 1.20158952e-01],
       [ 1.12341216e+00],
       [ 3.69190470e-01],
       [ 1.76041518e+00],
       [ 1.21821271e-01],
       [ 5.28879746e-01],
       [ 3.18014296e-01],
       [ 8.24005618e-01],
       [ 2.45422849e-01],
       [ 1.74094083e-02],
       [ 4.41364444e-01],
       [-5.93843067e-01],
       [ 4.66643267e-01],
       [-5.63236604e-01],
       [-2.69836174e+00],
       [ 1.01120706e+00],
       [ 1.23289919e+00],
       [-3.74804687e-01],
       [-1.0

In [8]:
X_test

array([[-0.60598132],
       [-0.79954749],
       [ 2.05635552],
       [ 1.30253554],
       [ 0.13770121],
       [ 0.60878366],
       [-0.67124613],
       [-1.10657307],
       [-0.32905155],
       [-0.17242821],
       [-1.51045638],
       [ 1.45810824],
       [ 0.80186103],
       [-0.68067814],
       [ 0.82797464],
       [-1.1191154 ],
       [-0.87785842],
       [-0.20975294],
       [-0.2210289 ],
       [-0.4148469 ],
       [ 0.6085147 ],
       [ 0.62336218],
       [ 1.35010682],
       [ 0.84616065],
       [ 0.37167029],
       [ 2.18697965],
       [-0.29664115],
       [ 0.49233656],
       [ 1.21987438],
       [-0.37443832],
       [ 0.69257435],
       [ 0.2373327 ],
       [-2.07680202],
       [ 0.24879916],
       [ 0.55607351],
       [ 1.71066184],
       [ 1.9560789 ],
       [ 0.79452824],
       [-1.39649634],
       [-0.86131636],
       [ 0.14225137],
       [-0.25898285],
       [ 0.30017032],
       [-0.97989025],
       [-0.1834002 ],
       [ 1

In [9]:
y

array([-3.35039904e+00,  3.09689666e+00,  2.74963013e+01,  5.24339879e+01,
       -1.01187574e+01,  7.30580671e+01,  1.64687657e+00,  7.99640580e+01,
       -2.84462079e+01,  6.75553361e+01, -3.48570278e+01, -1.88853710e+01,
       -1.00521817e+01,  2.48379089e+01, -2.04566818e+01, -3.53840015e+01,
        8.16016773e+00,  3.87719815e+00,  5.38699282e+01, -2.05066429e+01,
       -2.00263681e+01,  4.58661196e+01,  3.11933930e+01,  2.97838630e+01,
       -1.15184070e+02, -2.71869184e+01, -7.03013499e+01,  2.29488260e+00,
        1.31662911e+01,  5.17017420e+00, -5.20664765e+00,  2.75451418e+01,
        3.14616351e+01, -9.15353395e+01, -2.37639523e+01,  5.66567332e+01,
       -1.10986525e+02,  2.72387294e+01,  1.56457882e-01,  4.18535977e+01,
        1.44510489e+01, -1.08717990e+02, -4.08966324e+01, -1.19060720e+01,
       -1.44299323e+01,  3.56194715e+01, -5.13432955e+01,  4.78195347e+01,
        9.07726179e+01, -8.83786226e+00,  4.53748973e+01,  5.90985303e+01,
       -2.43150340e+01,  

In [10]:
y_test

array([-2.41490219e+01, -2.30105884e+01,  1.14587290e+02,  3.84428542e+01,
        2.29750380e+01,  4.00573787e+01, -3.08824041e+01, -4.44766802e+01,
        1.44339645e+01, -8.83786226e+00, -5.77395182e+01,  6.35890788e+01,
        4.29273921e+01, -3.53045813e+01,  5.56013721e+01, -4.42713383e+01,
       -1.33970552e+01,  1.64687657e+00, -3.21313386e+01, -4.29987567e+01,
        6.76455583e+01,  6.32276579e+01,  2.87502164e+01,  8.86336794e+00,
        1.52339026e+01,  8.86869829e+01, -4.26969286e+01, -1.17989257e+01,
        3.09112305e+01, -4.66085237e+01,  4.28171949e+01,  5.14457551e+01,
       -1.07434195e+02,  2.18994548e+01,  7.18088094e+00,  9.12396509e+01,
        7.53692099e+01,  3.38067928e+01, -5.39041035e+01, -1.50530894e+01,
        2.93002249e+01,  2.93353609e+01, -1.22693040e+01, -5.60115699e+01,
       -1.98687897e+01,  6.24657153e+01,  2.95620720e+01, -1.08717990e+02,
       -5.55272544e+01, -3.17567746e+01, -6.99350697e+00, -2.37639523e+01,
       -4.75633676e+01,  

In [11]:
reg.fit(X_train,y_train)
pred = reg.predict(X_test)
pred

array([ -22.58254698,  -29.97810685,   79.13704329,   50.33593182,
          5.83124389,   23.82983554,  -25.0761119 ,  -41.7085969 ,
        -12.00192382,   -6.01783425,  -57.13971888,   56.27987973,
         31.20671999,  -25.4364796 ,   32.20443964,  -42.18780027,
        -32.97012322,   -7.44389573,   -7.87471525,  -15.27989673,
         23.8195594 ,   24.38683524,   52.1534822 ,   32.89927019,
         14.77047302,   84.1277838 ,  -10.7636236 ,   19.38075479,
         47.17770672,  -13.73601078,   27.03121651,    9.63785247,
        -78.77802031,   10.07595023,   21.81594509,   65.92916539,
         75.30578629,   30.92655686,  -52.78566106,  -32.33810279,
          6.00509141,   -9.32481741,   12.03868204,  -36.86844148,
         -6.43703977,   51.44093508,    5.47499452,  -87.36450798,
        -50.95937634,  -51.89242178,   -0.80496266,  -33.07401469,
        -15.722398  ,   32.611729  ,   71.882162  ,   62.57411389,
         19.79523288,   14.07881096,   -6.55813933,  -46.03252

In [12]:
    def rmse(y_true, y_pred):
        return np.sqrt(np.mean((y_pred - y_true)**2))

In [13]:
print(rmse(y_test,pred))

20.48796052878318


In [33]:
model =datasets.make_regression(
    n_samples=1000, n_features=1, noise=20, random_state=1
    )


In [4]:
import pandas as pd
dataset = pd.read_csv("Salary_Data.csv")
dataset

Unnamed: 0,YearsExperience,Salary
0,1.1,39343.0
1,1.3,46205.0
2,1.5,37731.0
3,2.0,43525.0
4,2.2,39891.0
5,2.9,56642.0
6,3.0,60150.0
7,3.2,54445.0
8,3.2,64445.0
9,3.7,57189.0


In [9]:
X = dataset.iloc[:,:-1].values
X


array([[ 1.1],
       [ 1.3],
       [ 1.5],
       [ 2. ],
       [ 2.2],
       [ 2.9],
       [ 3. ],
       [ 3.2],
       [ 3.2],
       [ 3.7],
       [ 3.9],
       [ 4. ],
       [ 4. ],
       [ 4.1],
       [ 4.5],
       [ 4.9],
       [ 5.1],
       [ 5.3],
       [ 5.9],
       [ 6. ],
       [ 6.8],
       [ 7.1],
       [ 7.9],
       [ 8.2],
       [ 8.7],
       [ 9. ],
       [ 9.5],
       [ 9.6],
       [10.3],
       [10.5]])

In [10]:
y = dataset.iloc[:,-1].values
y

array([ 39343.,  46205.,  37731.,  43525.,  39891.,  56642.,  60150.,
        54445.,  64445.,  57189.,  63218.,  55794.,  56957.,  57081.,
        61111.,  67938.,  66029.,  83088.,  81363.,  93940.,  91738.,
        98273., 101302., 113812., 109431., 105582., 116969., 112635.,
       122391., 121872.])

In [13]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor

In [19]:
import numpy as np
x = np.array([0,1,2,3,4,5,6,7,8,9]).reshape((-1, 1))
y = np.array([1,3,2,5,7,8,8,9,10,12])

In [20]:
x

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [21]:
y

array([ 1,  3,  2,  5,  7,  8,  8,  9, 10, 12])

In [22]:
regressor.fit(x,y)

In [24]:
import pandas as pd
df = pd.DataFrame({'hours': [1, 1, 1, 2, 2, 2, 2, 2, 3, 3,
                             3, 4, 4, 4, 5, 5, 6, 7, 7, 8],
                   'score': [68, 76, 74, 80, 76, 78, 81, 84, 86, 83,
                             88, 85, 89, 94, 93, 94, 96, 89, 92, 97]})

In [25]:
df

Unnamed: 0,hours,score
0,1,68
1,1,76
2,1,74
3,2,80
4,2,76
5,2,78
6,2,81
7,2,84
8,3,86
9,3,83
