<a href="https://colab.research.google.com/github/simsekahmet/linear_regression/blob/main/linear_reg_sklearn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Simple Linear Regression**

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import f_regression

def simple_linear_regression(x, y, scale = False):
    """Simple Linear Regression with scikitlearn.

    --------------------
    Args:
        x1 (DataFrame): Independent numerical value.
        y (DataFrame): Dependent numerical value.
        scale (bool, optional): StandardScaling. Defaults to False

    --------------------
    Returns:
        DataFrame = Parameters of model.

    --------------------
    Notes:
        y = b0 + b1 * x

    --------------------
    Example:
        x = df["size"]
        y = df["price"]
    """
    if scale:
        scaler = StandardScaler()
        x_matrix = x.values.reshape(-1,1)
        x_scaled = scaler.fit_transform(x_matrix)
        x_matrix = x_scaled
    else:
        x_matrix = x.values.reshape(-1,1)

    reg = LinearRegression().fit(x_matrix, y)

    reg_summary = pd.DataFrame(data = pd.DataFrame(x).columns.values, columns=['Features'])
    reg_summary['Coefficients'] = reg.coef_
    reg_summary['Constant (Intercept)'] = reg.intercept_
    reg_summary['F statistics'] = f_regression(x_matrix, y)[0].round(3)
    reg_summary['p-values'] = f_regression(x_matrix, y)[1].round(3)
    reg_summary['R-square'] = reg.score(x_matrix, y).round(3)

    Adj_r2 = 1 - (1-reg.score(x_matrix, y)) * (len(y)-1)/(len(y)-x_matrix.shape[1]-1)
    reg_summary['Adjusted R-square'] = Adj_r2.round(3)

    return reg_summary

In [None]:
df = pd.read_csv("real_estate_price_size_year.csv")
simple_linear_regression(df["size"], df["price"], scale = True)

**Multiple Linear Regression**

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import f_regression

def multiple_linear_regression(x, y, scale = False):
    """Multiple Linear Regression with scikitlearn

    --------------------
    Args:
        x (DataFrame): Independent numerical value.
        y (DataFrame): Dependent numerical value.
        scale (bool, optional): StandardScaling. Defaults to False

    --------------------
    Returns:
        DataFrame = Parameters of model.

    Notes:
        y = b0 + b1 * x1 + b2 * x2 + ... + bn * xn

    --------------------
    Example:
        x = df[["size", "year"]]
        y = df["price"]
    """
    if scale:
        scaler = StandardScaler()
        x_scaled = scaler.fit_transform(x)
        x_ = x_scaled
    else:
        x_ = x

    reg = LinearRegression().fit(x_, y)

    reg_summary = pd.DataFrame(data = pd.DataFrame(x).columns.values, columns=['Features'])
    reg_summary['Coefficients'] = reg.coef_
    reg_summary['Constant (Intercept)'] = reg.intercept_
    reg_summary['F statistics'] = f_regression(x_, y)[0].round(3)
    reg_summary['p-values'] = f_regression(x_, y)[1].round(3)
    reg_summary['R-square'] = reg.score(x_, y).round(3)

    Adj_r2 = 1 - (1-reg.score(x_, y)) * (len(y)-1)/(len(y)-x_.shape[1]-1)
    reg_summary['Adjusted R-square'] = Adj_r2.round(3)

    return reg_summary

In [None]:
df2 = pd.read_csv("real_estate_price_size_year.csv")
multiple_linear_regression(df2[["size", "year"]], df2["price"])