In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [19]:
import numpy as np

class LinearRegression:

    def __init__(self, learning_rate = 0.01, num_iterations = 1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None


    def fit(self, X, y):                      # getting no of examples and features
        n_samples, n_features = X.shape
        
        self.weights = np.zeros(n_features)   # initializing weights and bias
        self.bias = 0
        
        for i in range(self.num_iterations):  # Gradient Descent
            y_pred = self.predict(X)
            
            dweights = (1 / n_samples) * np.dot(X.T, (y_pred - y))    # calculating gradients
            dbias = (1 / n_samples) * np.sum(y_pred - y)
            
            self.weights -= self.learning_rate * dweights             # updating parameters
            self.bias -= self.learning_rate * dbias
            
    def predict(self, X):
        y_pred = np.dot(X, self.weights) + self.bias                  # linear regression
        return y_pred

In [21]:
model = LinearRegression()
lb = LabelEncoder()
ss = StandardScaler()

In [22]:
df = pd.read_csv('50_Startups.csv')
df.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [23]:
A = ss.fit_transform(df[["R&D Spend", "Administration", "Marketing Spend"]])
df[["R&D Spend", "Administration", "Marketing Spend"]] = A
X = lb.fit_transform(df["State"])
df["State"] = X
df.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,2.016411,0.560753,2.153943,2,192261.83
1,1.95586,1.082807,1.9236,0,191792.06
2,1.754364,-0.728257,1.626528,1,191050.39
3,1.554784,-0.096365,1.42221,2,182901.99
4,1.504937,-1.079919,1.281528,1,166187.94


In [24]:
X = df[["R&D Spend", "Administration", "Marketing Spend", "State"]]
y = df["Profit"]

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
reg = model.fit(X_train,y_train)

In [26]:
p = pd.DataFrame()
p['Actual'] = y_test
p['Predicted'] = model.predict(X_test)
p

Unnamed: 0,Actual,Predicted
17,125370.37,130234.360693
28,103282.38,101384.327802
10,146121.95,135051.038435
7,155752.6,159629.033667
5,156991.12,163409.013376
3,182901.99,173508.280764
25,107404.34,101698.975474
39,81005.76,83385.813581
29,101004.64,101667.186401
20,118474.03,116345.866841
