In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [None]:
class linear_regression:
  # y = mx + b
  # m = Sum_of_All((x-x_mean)*(y-y_mean)) / Sum_of_All(x-x_mean)**2
  
  def fit(self,X,y):
    x_mean, y_mean  = X.mean(axis=0), y.mean(axis=0)
    numerator = (X-x_mean)*(y-y_mean)
    denominator = (X-x_mean)**2
    m = sum(numerator) / sum(denominator)
    b = y_mean - m * x_mean
    regression_line = m*X + b
    return m,b, regression_line

  def coef_(self):
    m, _, _  = self.fit()
    return m

  def intercept(self):
    _, b, _  = self.fit()
    return intercept

  def predict(self,X):
    m = self.coef_()
    b = self.intercept()
    y = m*X + b
    return y

  def sse(self, y_true, y_pred):
    # actual_y, pred_y = y, m*X+b
    # error = actual_y - pred_y
    error = y_true - y_pred
    sse = sum(error**2)
    return sse

  def mse(self, y_true, y_pred):
    # actual_y, pred_y = y, m*X+b
    # error = actual_y - pred_y
    error = y_true - y_pred
    sse = (1/len(y_true))*sum(error**2)
    return sse
  def r2_scores(self, y_true, y_pred):
    corr_matrix = np.corrcoef(y_true, y_pred)
    corr = corr_matrix[0,1]
    R_sq = corr**2
    return R_sq

    

In [None]:
train = pd.read_csv('../input/linear-regression/train.csv')
test = pd.read_csv('../input/linear-regression/test.csv')

In [None]:
# check null values

In [None]:
train.isnull().sum()

In [None]:
test.isnull().sum()

In [None]:
# drop na
train = train.dropna()

In [None]:
print(f'Train shape - {train.shape}')
print(f'Test shape - {test.shape}')

In [None]:
# split train-test datasets
X_train, y_train, X_test, y_test = train['x'], train['y'], test['x'], test['y']

In [None]:
# applying Linear Regression
model = linear_regression()
m_res, b_res, pred_results = model.fit(X_train, y_train)

print(f'Coefficient Value --> {m_res}')
print(f'Intercept Value --> {b_res}')

In [None]:
# predict X_test results
def predict(m,b,X):
    y = m*X + b
    return y
results_preds = predict(m_res, b_res,X_test)

In [None]:
score_r2 = model.r2_scores(y_test, results_preds)
print(f'Score For test {score_r2}')

In [None]:
plt.scatter(X_train, y_train)
plt.plot(X_test, results_preds,'r', label='Best Regression line')
plt.legend()
plt.show()

In [None]:
# calculate Error values
mse_error = model.mse(y_test, results_preds)
print(f'MSE Error is --> {mse_error}')
      
sse_error = model.sse(y_test, results_preds)
print(f'SSE Error is --> {sse_error}')

In [None]:
# verify results with Sklearn LinearRegression
lr_model = LinearRegression()

In [None]:
X = train.iloc[:, :-1].values
y = train.iloc[:, 1].values
x_test = test.iloc[:, :-1].values
y_test = test.iloc[:, :-1].values

In [None]:
lr_model.fit(X,y)

In [None]:
print(f'Coefficient Value --> {lr_model.coef_[0]}')
print(f'Intercept Value --> {lr_model.intercept_}')

In [None]:
y_pred_lr = lr_model.predict(x_test)
plt.scatter(X,y)
# m,b = np.polyfit(x, y, 1)
plt.plot(x_test, y_pred_lr,'b', label='Best Fit line with Sklearn')
plt.legend()
plt.show()

In [None]:
print('Score: %.2f' % lr_model.score(x_test, y_test))

In [None]:
score = r2_score(y_test, y_pred_lr)
print(f'Score For test with Sklearn Regression {score}')

In [None]:
# Conclusion:
''' With using numpy library and Sklearn linear regression,
Coefficient, intercept and r2_score results are same'''