In [32]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from statsmodels.stats.stattools import durbin_watson

# Generate synthetic linear data
linear_X, linear_y = make_regression(n_samples=506, n_features=13, noise=75, random_state=46)

# Fit a linear regression model
linear_model = LinearRegression()
linear_model.fit(linear_X, linear_y)

# Function to calculate residuals
def calculate_residuals(model, features, label):
    """
    Calculate residuals between the observed target values and the model's predictions.
    """
    predictions = model.predict(features)
    residuals = label - predictions
    return pd.DataFrame({'Residuals': residuals})

# Function to check for autocorrelation
def autocorrelation_assumption(model, features, label):
    """
    Autocorrelation: Assumes that there is no autocorrelation in the residuals. If there is
                     autocorrelation, then there is a pattern that is not explained due to
                     the current value being dependent on the previous value.
                     This may be resolved by adding a lag variable of either the dependent
                     variable or some of the predictors.
    """
    print('Assumption 4: No Autocorrelation', '\n')
    
    # Calculating residuals for the Durbin-Watson test
    df_results = calculate_residuals(model, features, label)

    print('\nPerforming Durbin-Watson Test')
    print('Values of 1.5 < d < 2.5 generally show that there is no autocorrelation in the data')
    print('0 to 2< is positive autocorrelation')
    print('>2 to 4 is negative autocorrelation')
    print('-------------------------------------')
    
    # Perform Durbin-Watson test
    durbinWatson = durbin_watson(df_results['Residuals'])
    
    # Print Durbin-Watson result
    print('Durbin-Watson:', durbinWatson)
    
    # Interpret results
    if durbinWatson < 1.5:
        print('Signs of positive autocorrelation', '\n')
        print('Assumption not satisfied')
    elif durbinWatson > 2.5:
        print('Signs of negative autocorrelation', '\n')
        print('Assumption not satisfied')
    else:
        print('Little to no autocorrelation', '\n')
        print('Assumption satisfied')

# Call the autocorrelation function for the linear model
autocorrelation_assumption(linear_model, linear_X, linear_y)


Assumption 4: No Autocorrelation 


Performing Durbin-Watson Test
Values of 1.5 < d < 2.5 generally show that there is no autocorrelation in the data
0 to 2< is positive autocorrelation
>2 to 4 is negative autocorrelation
-------------------------------------
Durbin-Watson: 1.8720452975329263
Little to no autocorrelation 

Assumption satisfied


In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from statsmodels.stats.stattools import durbin_watson

# Load Boston housing dataset from a CSV file, addressing the FutureWarning
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
column_names = [
    'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
    'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'
]
# Use sep='\s+' instead of delim_whitespace to avoid the FutureWarning
boston = pd.read_csv(url, sep='\s+', header=None, names=column_names)

# Separate features and target
boston_data = boston.drop('MEDV', axis=1)
boston_target = boston['MEDV']
boston_feature_names = boston_data.columns

# Fit a linear regression model
boston_model = LinearRegression()
boston_model.fit(boston_data, boston_target)

# Residual calculation function
def calculate_residuals(model, features, label):
    """
    Calculate residuals between the observed target values and the model's predictions.
    """
    predictions = model.predict(features)
    residuals = label - predictions
    return pd.DataFrame({'Residuals': residuals})

# Function to check for autocorrelation
def autocorrelation_assumption(model, features, label):
    """
    Autocorrelation: Assumes that there is no autocorrelation in the residuals. If there is
                     autocorrelation, then there is a pattern that is not explained due to
                     the current value being dependent on the previous value.
                     This may be resolved by adding a lag variable of either the dependent
                     variable or some of the predictors.
    """
    print('Assumption 4: No Autocorrelation\n')

    # Calculate residuals
    df_results = calculate_residuals(model, features, label)

    print('\nPerforming Durbin-Watson Test')
    print('Values of 1.5 < d < 2.5 generally show that there is no autocorrelation in the data')
    print('0 to 2< is positive autocorrelation')
    print('>2 to 4 is negative autocorrelation')
    print('-------------------------------------')
    durbinWatson = durbin_watson(df_results['Residuals'])
    print('Durbin-Watson:', durbinWatson)
    if durbinWatson < 1.5:
        print('Signs of positive autocorrelation', '\n')
        print('Assumption not satisfied')
    elif durbinWatson > 2.5:
        print('Signs of negative autocorrelation', '\n')
        print('Assumption not satisfied')
    else:
        print('Little to no autocorrelation', '\n')
        print('Assumption satisfied')
# Call the autocorrelation function
autocorrelation_assumption(boston_model, boston_data, boston_target)


Assumption 4: No Autocorrelation


Performing Durbin-Watson Test
Values of 1.5 < d < 2.5 generally show that there is no autocorrelation in the data
0 to 2< is positive autocorrelation
>2 to 4 is negative autocorrelation
-------------------------------------
Durbin-Watson: 1.0783751186797252
Signs of positive autocorrelation 

Assumption not satisfied
