# Linear Regression Example

## Installation

### Package

In [3]:
## Install Library 
#%pip install backwards-regression #https://pypi.org/project/backwards_regression/

Collecting backwards-regression
  Downloading backwards_regression-0.1.0.tar.gz (4.0 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: backwards-regression
  Building wheel for backwards-regression (pyproject.toml): started
  Building wheel for backwards-regression (pyproject.toml): finished with status 'done'
  Created wheel for backwards-regression: filename=backwards_regression-0.1.0-py3-none-any.whl size=5671 sha256=7fba94560fb19268f146ac5b7432702c8141b7bd5863fa54933a51f7612f632d
  Stored in directory: c:\users\kwadw.desktop-t9bstpe\appdata\local\pip\cache\wheels\ee\58\d3\df8451e910cb90148cd90e63a60a64e79d138e25d8e3c80638
Successfully built backwards-regression
In


[notice] A new release of pip is available: 23.2.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
## Load up Library
from backwards_regression import fit_linear

## Linear Regression

### Data Exploration

In [7]:
## Sample Data

import pandas as pd
import numpy as np

# Set a random seed for reproducibility
np.random.seed(42)

# Generate synthetic financial data for linear regression
def generate_financial_data(n=1000):
    # Features
    age = np.random.normal(40, 10, n)
    income = np.random.normal(50000, 10000, n)
    savings = np.random.normal(20000, 5000, n)
    debt = np.random.normal(10000, 3000, n)
    credit_score = np.random.normal(700, 50, n)

    # Additional variables
    assets = np.random.normal(30000, 8000, n)
    liabilities = np.random.normal(12000, 4000, n)
    monthly_expenses = np.random.normal(4000, 1000, n)

    # Generate a linear relationship with some noise
    target = 0.5 * age + 0.2 * income - 0.3 * savings + 0.1 * debt + 0.15 * credit_score + np.random.normal(0, 5, n)

    # Create DataFrame
    data = pd.DataFrame({
        'Age': age,
        'Income': income,
        'Savings': savings,
        'Debt': debt,
        'CreditScore': credit_score,
        'Assets': assets,
        'Liabilities': liabilities,
        'MonthlyExpenses': monthly_expenses,
        'Target': target
    })

    return data

# Generate financial dataset for linear regression
financial_df = generate_financial_data()


In [9]:
## Separate features (X) and target variable (y)
X = financial_df.drop('Target', axis=1)  # Features
y = financial_df['Target']  # Target variable

# Display the features (X) DataFrame
print("Features (X):")
print(X.head())

# Display the target variable (y) Series
print("\nTarget variable (y):")
print(y.head())

Features (X):
         Age        Income  ...   Liabilities  MonthlyExpenses
0  44.967142  63993.554366  ...   7543.674567      4785.185082
1  38.617357  59246.336829  ...   9476.276665      2222.319038
2  46.476885  50596.303699  ...   8231.759266      4714.745650
3  55.230299  43530.632223  ...   9808.016725      3766.275942
4  37.658466  56982.233136  ...  11143.398757      4707.457711

[5 rows x 8 columns]

Target variable (y):
0    8359.978261
1    6929.486270
2    6311.320656
3    4874.108572
4    9523.801644
Name: Target, dtype: float64


### With Interactions Included

In [10]:
## With interactions included - set to True
result, dropped_vars = fit_linear(X, y, threshold_in=0.01, threshold_out=0.05, include_interactions=True, verbose=True)

INFO:root:Drop feature Liabilities with p-value 0.9602872163010282
INFO:root:Iteration 2: Current features: ['Age', 'Income', 'Savings', 'Debt', 'CreditScore', 'Assets', 'MonthlyExpenses']
INFO:root:Drop feature Assets with p-value 0.6417684824230647
INFO:root:Iteration 3: Current features: ['Age', 'Income', 'Savings', 'Debt', 'CreditScore', 'MonthlyExpenses']
INFO:root:Drop feature MonthlyExpenses with p-value 0.38515644186354714
INFO:root:Iteration 4: Current features: ['Age', 'Income', 'Savings', 'Debt', 'CreditScore']
INFO:root:Drop interaction term Age * Income with p-value 0.9917758827207849
INFO:root:Drop interaction term Age * Savings with p-value 0.610145390391784


INFO:root:Drop interaction term Age * Debt with p-value 0.5463331331649792
INFO:root:Drop interaction term Age * CreditScore with p-value 0.3218387082529018
INFO:root:Drop interaction term Income * Savings with p-value 0.01585052347050218
INFO:root:Drop interaction term Income * Debt with p-value 0.49846989988489143
INFO:root:Drop interaction term Income * CreditScore with p-value 0.346568201709456
INFO:root:Drop interaction term Savings * Debt with p-value 0.559873991042455
INFO:root:Drop interaction term Savings * CreditScore with p-value 0.9876986538896217
INFO:root:Drop interaction term Debt * CreditScore with p-value 0.8417246679634355


In [11]:
## Print Selected features
print("Final included features:", result)

Final included features: ['Age', 'Income', 'Savings', 'Debt', 'CreditScore']


In [12]:
## Print Eliminated features
print("Dropped variables:", dropped_vars)

Dropped variables: ['Liabilities', 'Assets', 'MonthlyExpenses', 'Age * Income', 'Age * Savings', 'Age * Debt', 'Age * CreditScore', 'Income * Savings', 'Income * Debt', 'Income * CreditScore', 'Savings * Debt', 'Savings * CreditScore', 'Debt * CreditScore']


### Without Interactions Included

In [13]:
## Without interactions included - set to False
result, dropped_vars = fit_linear(X, y, threshold_in=0.01, threshold_out=0.05, include_interactions=False, verbose=True)

INFO:root:Drop feature Liabilities with p-value 0.9602872163010282
INFO:root:Iteration 2: Current features: ['Age', 'Income', 'Savings', 'Debt', 'CreditScore', 'Assets', 'MonthlyExpenses']
INFO:root:Drop feature Assets with p-value 0.6417684824230647
INFO:root:Iteration 3: Current features: ['Age', 'Income', 'Savings', 'Debt', 'CreditScore', 'MonthlyExpenses']
INFO:root:Drop feature MonthlyExpenses with p-value 0.38515644186354714
INFO:root:Iteration 4: Current features: ['Age', 'Income', 'Savings', 'Debt', 'CreditScore']


In [14]:
## Print Selected features
print("Final included features:", result)

Final included features: ['Age', 'Income', 'Savings', 'Debt', 'CreditScore']


In [15]:
print("Dropped variables:", dropped_vars)

Dropped variables: ['Liabilities', 'Assets', 'MonthlyExpenses']
