# Polynomial Regression with Regularization

In [None]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

# Generate synthetic data
np.random.seed(0)
X_train = np.linspace(0, 10, 100).reshape(-1, 1)
y_train = 2 * np.sin(X_train) + np.random.normal(0, 0.5, size=X_train.shape)

# Generate polynomial features
poly = PolynomialFeatures(degree=10)
X_poly_train = poly.fit_transform(X_train)

# Fit polynomial regression with regularization
ridge = Ridge(alpha=1.0)
ridge.fit(X_poly_train, y_train)

# Evaluate model performance
train_error = mean_squared_error(y_train, ridge.predict(X_poly_train))
print(f"Training Error: {train_error}")

# Optionally, repeat the above steps for test data
# X_test = np.linspace(0, 10, 50).reshape(-1, 1)
# y_test = 2 * np.sin(X_test) + np.random.normal(0, 0.5, size=X_test.shape)
# X_poly_test = poly.transform(X_test)
# test_error = mean_squared_error(y_test, ridge.predict(X_poly_test))
# print(f"Test Error: {test_error}")

# Cost-Complexity Pruning

In [None]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, plot_tree
import matplotlib.pyplot as plt

# Load example dataset (Boston housing dataset)
data = load_boston()
X, y = data.data, data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Regressor
reg = DecisionTreeRegressor(random_state=42)
reg.fit(X_train, y_train)

# Visualize the original tree
plt.figure(figsize=(12, 6))
plot_tree(reg, filled=True, feature_names=data.feature_names)
plt.title("Original Decision Tree")
plt.show()

# Cost-Complexity Pruning: Path to optimal alpha
path = reg.cost_complexity_pruning_path(X_train, y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities

# Train Decision Trees for each alpha and collect accuracy
regs = []
for ccp_alpha in ccp_alphas:
    reg = DecisionTreeRegressor(random_state=42, ccp_alpha=ccp_alpha)
    reg.fit(X_train, y_train)
    regs.append(reg)

# Remove the last element in regs and ccp_alphas, which is the trivial tree with ccp_alpha=0.0
regs = regs[:-1]
ccp_alphas = ccp_alphas[:-1]

# Mean Squared Error vs alpha for training and testing sets
train_errors = [mean_squared_error(y_train, reg.predict(X_train)) for reg in regs]
test_errors = [mean_squared_error(y_test, reg.predict(X_test)) for reg in regs]

# Plot Mean Squared Error vs alpha
plt.figure(figsize=(10, 6))
plt.plot(ccp_alphas, train_errors, marker='o', label='train', drawstyle="steps-post")
plt.plot(ccp_alphas, test_errors, marker='o', label='test', drawstyle="steps-post")
plt.xlabel("alpha")
plt.ylabel("Mean Squared Error")
plt.title("Mean Squared Error vs alpha for training and testing sets")
plt.legend()
plt.show()

# Predictive Maintenance

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Generate noisy data
np.random.seed(0)
X = np.linspace(0, 10, 100)
y_true = np.sin(X) + np.random.normal(0, 0.1, size=X.shape)

# Fit polynomial regression models of different degrees
degrees = [1, 4, 15]
plt.figure(figsize=(12, 4))
for i, degree in enumerate(degrees):
    plt.subplot(1, len(degrees), i + 1)
    plt.scatter(X, y_true, s=10, label='Noisy data')
    coeffs = np.polyfit(X, y_true, degree)
    poly = np.poly1d(coeffs)
    y_pred = poly(X)
    plt.plot(X, y_pred, color='r', label=f'Degree {degree}')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title(f'Polynomial Regression: Degree {degree}')
    plt.legend()

plt.tight_layout()
plt.show()

# Linear Regression for Stock Price Prediction

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load historical stock price data (example)
# Replace with your actual dataset or API call to fetch data
# Here's a simulated example with random data
np.random.seed(0)
dates = pd.date_range('2023-01-01', periods=100)
prices = np.cumsum(np.random.randn(100))  # Simulated stock prices
data = pd.DataFrame({'Date': dates, 'Price': prices})
data.set_index('Date', inplace=True)

# Feature engineering: Create lagged features for prediction
data['Price_Lag1'] = data['Price'].shift(1)
data['Price_Lag2'] = data['Price'].shift(2)
data.dropna(inplace=True)

# Prepare data for regression
X = data[['Price_Lag1', 'Price_Lag2']].values
y = data['Price'].values

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Initialize and fit the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Model evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')

# Plotting predictions
plt.figure(figsize=(10, 6))
plt.plot(data.index[-len(y_test):], y_test, label='Actual Prices')
plt.plot(data.index[-len(y_test):], y_pred, label='Predicted Prices')
plt.title('Stock Price Prediction using Linear Regression')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# K-Fold Cross-Validation

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load or generate your dataset
# For demonstration, let's use a synthetic dataset
np.random.seed(0)
X = np.random.randn(100, 5)  # Example features
y = np.random.randn(100)     # Example target

# Initialize KFold with k=5
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=0)

# Initialize a model (e.g., Linear Regression)
model = LinearRegression()

# Lists to store scores
train_scores = []
test_scores = []

# Perform K-Fold Cross-Validation
for train_index, test_index in kf.split(X):
    # Split data into train and test folds
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the model on training data
    model.fit(X_train, y_train)

    # Evaluate on training data
    train_pred = model.predict(X_train)
    train_error = mean_squared_error(y_train, train_pred)
    train_scores.append(train_error)

    # Evaluate on test data
    test_pred = model.predict(X_test)
    test_error = mean_squared_error(y_test, test_pred)
    test_scores.append(test_error)

# Calculate average scores
avg_train_score = np.mean(train_scores)
avg_test_score = np.mean(test_scores)

print(f'Average Train MSE: {avg_train_score:.2f}')
print(f'Average Test MSE: {avg_test_score:.2f}')