In [1]:
# Importing the necessary libraries
import pandas as pd
import numpy as np
from scipy import stats

In [50]:
def Mregression(x:pd.DataFrame) -> None:
  """
  This function is used for implementing a multiple Linear Regression model on the dataset.

  Args:
      Expects a pandas dataframe with Dependent Variable as the last column.
  """
  # Segrefatig the X and Y
  X = x.iloc[:,:-1]
  Y = x.iloc[:,-1]
  
  # Finding the number of th obs and number of regressors
  no_of_reg = len(X.columns)
  num_obs = Y.shape[0]

  # Forming the  matrix
  b0 = pd.DataFrame({'b0_1': [1]*num_obs})
  x_matrix = pd.concat([b0,X],axis = 1,join = 'inner')

  # Converting the x matrix and y as arrays
  x_matrix = np.array(x_matrix)
  y_matrix = np.array(Y) 

  # Finding the beta coefficients
  first_part = np.linalg.inv(x_matrix.T@x_matrix)
  second_part = x_matrix.T@y_matrix
  beta_coeff = (first_part@second_part)
  
  # Predicted values
  yhat=  x@beta_coeff

  #error
  error = y_matrix - yhat

  # Sum of squares
  sum = 0
  count = 0
  for l in y_matrix:
    sum +=1
    count += 1
  ybar = sum/count

  sum_of_sq_reg = ((beta_coeff.T@x_matrix.T@y_matrix) - (num_obs*(ybar**2)))
  sum_of_sq_tot = (y_matrix.T@y_matrix - (num_obs*(ybar**2)))
  sum_of_sq_er = sum_of_sq_tot - sum_of_sq_reg 
  Msum_of_sq_reg = sum_of_sq_reg/no_of_reg
  Msum_of_sq_er = (sum_of_sq_er/(num_obs - no_of_reg - 1))

  # R square and Adjusted R Square
  Rsquare = (1 - (sum_of_sq_er/sum_of_sq_tot))
  adj_Rsquare = (1 - (sum_of_sq_er/(num_obs - no_of_reg -1))/(sum_of_sq_tot/num_obs))

  # F Statistics
  Fvalue = Msum_of_sq_reg/ Msum_of_sq_er

  pvalue = 1 - stats.f.cdf(Fvalue, no_of_reg, (num_obs - no_of_reg - 1))

  # Finding the standard error and test statistics
  C = np.diagonal(first_part)
  std_error = []
  tvalue = []
  for m in range(no_of_reg + 1):
    std_error.append(((Msum_of_sq_er*C[m])**0.5))
    tvalue.append(beta_coeff[m]/std_error[m])

  # printing the output
  print('\n The Sum of Squares due to the regressor is:     ', round(sum_of_sq_reg,4))
  print('Its degree of freedom is:                          ', no_of_reg)
  print('\n The Sum of Squares due to residuals is:         ', round(sum_of_sq_er,4))
  print('Its degree of freedom is:                          ', num_obs - no_of_reg - 1)
  print('\n The Sum of Squares due to total is:             ', round(sum_of_sq_tot,4))
  print('Its degrees of freedom is:                         ', num_obs - 1)
  print('\n The beta Coefficients are:')
  table1 = pd.DataFrame({'Beta': beta_coeff, 'Standard_errors': std_error, 'Test Statistic': tvalue})
  print(table1)
  print("\n F-Test statistics value is:                     ", round(Fvalue,4))
  print('Its p-value is:                                    ', round(pvalue,4))
  print('\n Interpretation for the significance of the overall model: ')
  if pvalue>0.05:
    print('\n The model is not significant.')
  else:
    print('\n The model is significant.')

  print('\n The R Square value                              ', round(Rsquare,4))
  print('The Adjusted R-Square value is:                    ', round(adj_Rsquare,4))
  table = pd.DataFrame({'Y': Y,'Y_hat': yhat, 'Errors': error})
  print('\n The predicted Y and errors are:')
  print(table)




In [51]:
data = pd.read_excel('Real estate valuation data set.xlsx')
data.head()

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
0,1,2012.916667,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.916667,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583333,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833333,5.0,390.5684,5,24.97937,121.54245,43.1


In [52]:
# Dropping the 'No' Column
data.drop(['No','X5 latitude','X6 longitude'], axis = 1, inplace = True)
data.head()

Unnamed: 0,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,Y house price of unit area
0,2012.916667,32.0,84.87882,10,37.9
1,2012.916667,19.5,306.5947,9,42.2
2,2013.583333,13.3,561.9845,5,47.3
3,2013.5,13.3,561.9845,5,54.8
4,2012.833333,5.0,390.5684,5,43.1


In [53]:
data.shape

(414, 5)

In [54]:
Mregression(data)


 The Sum of Squares due to the regressor is:      639238.8941
Its degree of freedom is:                           4

 The Sum of Squares due to residuals is:          34001.4459
Its degree of freedom is:                           409

 The Sum of Squares due to total is:              673240.34
Its degrees of freedom is:                          413

 The beta Coefficients are:
           Beta  Standard_errors  Test Statistic
0 -11593.599174      3214.401996       -3.606767
1      5.780453         1.596749        3.620138
2     -0.254479         0.039529       -6.437849
3     -0.005513         0.000448      -12.305250
4      1.257923         0.191795        6.558701

 F-Test statistics value is:                      1922.3352
Its p-value is:                                     0.0

 Interpretation for the significance of the overall model: 

 The model is significant.

 The R Square value                               0.9495
The Adjusted R-Square value is:                     0.9489

 