In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import  PartialDependenceDisplay,permutation_importance
from sklearn.linear_model import LinearRegression, LogisticRegression
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn import tree
import seaborn as sns
import numpy as np
%matplotlib inline

# Load the dataset
data = pd.read_csv('day.csv')

# Select relevant features and target
features = ['hum','temp', 'windspeed']
target = 'cnt'
X = data.loc[:,'season':'windspeed']
y = data['cnt']>=data['cnt'].median()  # Target is count of bike rentals

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numerical features
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X.columns)
X_test_scaled= pd.DataFrame(scaler.transform(X_test), columns=X.columns)

In [3]:
log_reg = LogisticRegression()
log_reg.fit(X_train_scaled, y_train)

print(f'Intercept: {log_reg.intercept_[0]:.3f}')
for coef, feature in zip(log_reg.coef_[0], features):
    print(f'{feature}_s: {coef:.3f}')

Intercept: -0.007
hum_s: 0.831
temp_s: 1.883
windspeed_s: 0.089


In [4]:
def coefficients_to_odds_ratios(coefficients):
    
    """
    Convert logistic regression coefficients to odds ratios.
    
    Parameters:
        coefficients (array-like): Array of logistic regression coefficients
        
    Returns:
        numpy.ndarray: Array of odds ratios
    
    Example:
        >>> coefficients_to_odds_ratios([0.693, -0.693])
        array([2.0, 0.5])  # e^0.693 ≈ 2.0, e^-0.693 ≈ 0.5
    """
    coefficients = np.array(coefficients)
    return np.exp(coefficients)

# Test with basic model
basic_odds_ratios = coefficients_to_odds_ratios(log_reg.coef_[0])
for feature, odds_ratio in zip(features, basic_odds_ratios):
    print(f"{feature} odds ratio: {odds_ratio:.3f}")

hum odds ratio: 2.296
temp odds ratio: 6.576
windspeed odds ratio: 1.093


In [5]:
def probability_change_at_mean(coefficient):
    """
    Calculate the change in probability at the mean (P=0.5) for a given coefficient.
    
    Parameters:
        coefficient (float): Logistic regression coefficient
        
    Returns:
        float: Change in probability (between -0.5 and 0.5)
    
    Example:
        >>> probability_change_at_mean(0.693)
        0.167  # Approximately 16.7 percentage point increase
    """
    return 1 / (1 + np.exp(-coefficient)) - 0.5

# Test with our basic model
for feature, coef in zip(features, log_reg.coef_[0]):
    prob_change = probability_change_at_mean(coef)
    print(f"{feature} probability change: {prob_change:.3f}")

hum probability change: 0.197
temp probability change: 0.368
windspeed probability change: 0.022


In [6]:
expanded_features =features + ['season', 'holiday', 'weekday']
expanded_model = LogisticRegression(random_state=42)
expanded_model.fit(X_train_scaled[expanded_features], y_train)

def interpret_logistic_model(model, feature_names):
    # Get coefficients from model
    coefficients = model.coef_[0]
    
    # Calculate interpretability metrics
    odds_ratios = coefficients_to_odds_ratios(coefficients)
    prob_changes = np.array([probability_change_at_mean(coef) for coef in coefficients])
    
    # Create interpretation dictionary
    interpretation = {}
    for idx, feature in enumerate(feature_names):
        interpretation[feature] = {
            'coefficient': coefficients[idx],
            'odds_ratio': odds_ratios[idx],
            'probability_change': prob_changes[idx]
        }
    
    return interpretation

# Compare interpretations
print("\nBasic Model Interpretation:")
basic_interpretation = interpret_logistic_model(log_reg,features)
for feature, metrics in basic_interpretation.items():
    print(f"\n{feature}:")
    for metric, value in metrics.items():
        print(f"  {metric}: {value:.3f}")

print("\nExpanded Model Interpretation (first 3 features):")
expanded_interpretation = interpret_logistic_model(
    expanded_model, 
    expanded_features
)
for feature in features:
    print(f"\n{feature}:")
    for metric, value in expanded_interpretation[feature].items():
        print(f"  {metric}: {value:.3f}")


Basic Model Interpretation:

hum:
  coefficient: 0.831
  odds_ratio: 2.296
  probability_change: 0.197

temp:
  coefficient: 1.883
  odds_ratio: 6.576
  probability_change: 0.368

windspeed:
  coefficient: 0.089
  odds_ratio: 1.093
  probability_change: 0.022

Expanded Model Interpretation (first 3 features):

hum:
  coefficient: -0.820
  odds_ratio: 0.441
  probability_change: -0.194

temp:
  coefficient: 1.434
  odds_ratio: 4.196
  probability_change: 0.308

windspeed:
  coefficient: -0.425
  odds_ratio: 0.654
  probability_change: -0.105
