In [None]:
import pandas as pd
from datetime import datetime
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
data = pd.read_csv("AWCustomers.csv")

# Create Age column
data['BirthDate'] = pd.to_datetime(data['BirthDate'])
data['Age'] = datetime.now().year - data['BirthDate'].dt.year

# Select features (X) and target (y)
X = data[['Age', 'NumberCarsOwned', 'TotalChildren', 'NumberChildrenAtHome']]
y = data['YearlyIncome']

# Convert to numeric (safety)
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(y, errors='coerce')

# Handle missing values
X = X.fillna(X.median())
y = y.fillna(y.median())

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
score = model.score(X_test, y_test)

print("Linear Regression R^2 Score:", score)


Linear Regression R^2 Score: 0.23133741884445713
