# Linear Regression


In [204]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import plotly.express as px
import plotly.graph_objects as go

# Load the California Housing dataset
california = fetch_california_housing()
X = pd.DataFrame(california.data, columns=california.feature_names)
y = california.target
print(X.head())

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  
0    -122.23  
1    -122.22  
2    -122.24  
3    -122.25  
4    -122.25  


In [203]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
linear_regression = LinearRegression()

# Train the model
linear_regression.fit(X_train, y_train)

# Predict on the test set
y_pred = linear_regression.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Linear Regression MSE:", mse)


Linear Regression MSE: 0.5558915986952437


In [199]:
# Single input prediction
single_input = X_test.iloc[0, :].values.reshape(1, -1)
single_input_df = pd.DataFrame(single_input, columns=X.columns)
print(single_input_df)
single_pred = linear_regression.predict(single_input_df)
print("Single Input:", single_input)
print("Linear Regression Prediction for single input:", single_pred)

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  1.6812      25.0  4.192201   1.022284      1392.0  3.877437     36.06   

   Longitude  
0    -119.01  
Single Input: [[ 1.68120000e+00  2.50000000e+01  4.19220056e+00  1.02228412e+00
   1.39200000e+03  3.87743733e+00  3.60600000e+01 -1.19010000e+02]]
Linear Regression Prediction for single input: [0.71912284]


In [200]:
# Plot predicted vs actual values
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=y_test, y=y_pred,
    mode='markers',
    name='Predictions'
))
fig.add_trace(go.Scatter(
    x=y_test, y=y_test,
    mode='markers',
    name='Actual Values'
))
fig.update_layout(title='Predicted vs Actual Values',
                  xaxis_title='Actual Values',
                  yaxis_title='Predicted Values')
fig.show()

# Visualize single input prediction
fig = go.Figure()
fig.add_trace(go.Bar(
    x=['Actual Value', 'Predicted Value'],
    y=[y_test[0], single_pred[0]],
    name='Single Input Prediction'
))
fig.update_layout(title='Single Input Prediction Comparison',
                  xaxis_title='Model',
                  yaxis_title='House Price')
fig.show()


# Polynomial Regression

In [225]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

# Generate synthetic data
np.random.seed(0)
X = np.random.rand(5000, 1) * 10
y = 3 + 0.5 * X + 12 * X**2 - 1 * X**3 + np.random.randn(5000, 1) * 3
fig_plot = px.scatter(x=X.flatten(), y=y.flatten(), labels={'x': 'X', 'y': 'y'}, title="Polynomial Regression")
fig_plot.show()
print(X)




[[5.48813504]
 [7.15189366]
 [6.02763376]
 ...
 [1.97388262]
 [3.9868722 ]
 [9.58593103]]


In [223]:
# Create polynomial features
poly = PolynomialFeatures(degree=3)
X_poly = poly.fit_transform(X)
print(X_poly)
# Fit polynomial regression model


[[  1.           5.48813504  30.11962621 165.30057597]
 [  1.           7.15189366  51.14958298 365.8163784 ]
 [  1.           6.02763376  36.33236875 218.9982125 ]
 ...
 [  1.           1.97388262   3.89621261   7.69066636]
 [  1.           3.9868722   15.89514991  63.37193126]
 [  1.           9.58593103  91.89007371 880.85190891]]


In [230]:
model = LinearRegression()
model.fit(X_poly, y)
y_poly_pred = model.predict(X_poly)

# Plot original data and polynomial regression line
fig = px.scatter(x=X.flatten(), y=y.flatten(), labels={'x': 'X', 'y': 'y'}, title="Polynomial Regression")
fig.add_trace(go.Scatter(x=X.flatten(), y=y_poly_pred.flatten(), mode='lines', name='Polynomial Fit'))
fig.show()

In [270]:
Single_input = 17.9
y_poly_pred = model.predict(poly.fit_transform([[Single_input]]))
y_actual = 3 + 0.5 * Single_input + 12 * Single_input**2 - 1 * Single_input**3 + np.random.randn(5000, 1) * 3

print(y_poly_pred , y_actual[0])
print(mean_squared_error(y_poly_pred, y_actual[0]))

[[-1884.0666126]] [-1883.15318065]
0.8343579376875763


# Ridge and Lasso

In [273]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error
import plotly.express as px
import plotly.graph_objects as go

# Load the Boston Housing dataset
california = fetch_california_housing()
X = pd.DataFrame(california.data, columns=california.feature_names)
y = california.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [None]:
# Initialize Ridge and Lasso regressors
ridge = Ridge(alpha=1.0)
lasso = Lasso(alpha=0.1)

# Fit the models
ridge.fit(X_train, y_train)
lasso.fit(X_train, y_train)

# Predict on the test set
ridge_pred = ridge.predict(X_test)
lasso_pred = lasso.predict(X_test)

# Evaluate the models
ridge_mse = mean_squared_error(y_test, ridge_pred)
lasso_mse = mean_squared_error(y_test, lasso_pred)

print("Ridge Regression MSE:", ridge_mse)
print("Lasso Regression MSE:", lasso_mse)

# Plot the coefficients
ridge_coefs = pd.DataFrame(ridge.coef_, index=X.columns, columns=['Ridge Coefficients'])
lasso_coefs = pd.DataFrame(lasso.coef_, index=X.columns, columns=['Lasso Coefficients'])




In [82]:
fig = go.Figure()
fig.add_trace(go.Bar(
    x=ridge_coefs.index,
    y=ridge_coefs['Ridge Coefficients'],
    name='Ridge Coefficients'
))
fig.add_trace(go.Bar(
    x=lasso_coefs.index,
    y=lasso_coefs['Lasso Coefficients'],
    name='Lasso Coefficients'
))
fig.update_layout(title='Ridge vs Lasso Coefficients',
                  xaxis_title='Features',
                  yaxis_title='Coefficient Value')
fig.show()

# Plot predicted vs actual values
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=y_test, y=ridge_pred,
    mode='markers',
    name='Ridge Predictions'
))
fig.add_trace(go.Scatter(
    x=y_test, y=lasso_pred,
    mode='markers',
    name='Lasso Predictions'
))
fig.add_trace(go.Scatter(
    x=y_test, y=y_test,
    mode='lines',
    name='Actual Values'
))
fig.update_layout(title='Predicted vs Actual Values',
                  xaxis_title='Actual Values',
                  yaxis_title='Predicted Values')
fig.show()


In [279]:
# Single input prediction
single_input = X_test.iloc[0:1]
print(single_input)


       MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
20046  1.6812      25.0  4.192201   1.022284      1392.0  3.877437     36.06   

       Longitude  
20046    -119.01  


In [280]:
ridge_pred = ridge.predict(single_input)
lasso_pred = lasso.predict(single_input)

In [285]:
print(ridge_pred) 
print(lasso_pred)
print(y_test[0:1])

[0.71923978]
[1.04628114]
[0.477]


# Question

In [317]:
np.random.seed(42)
X = np.linspace(0, 2 * np.pi, 100).reshape(-1, 1)
y = np.sin(X) + np.random.normal(scale=0.1, size=X.shape)


In [324]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error
import plotly.graph_objs as go

# Generate synthetic data
np.random.seed(42)
X = np.linspace(0, 2 * np.pi, 100).reshape(-1, 1)
y = np.sin(X) + np.random.normal(scale=0.1, size=X.shape)

# Polynomial transformation
poly = PolynomialFeatures(degree=5)
X_poly = poly.fit_transform(X)

# Fit polynomial regression model
linear_model = LinearRegression()
linear_model.fit(X_poly, y)
y_pred_linear = linear_model.predict(X_poly)

# Fit Ridge regression model
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_poly, y)
y_pred_ridge = ridge_model.predict(X_poly)

# Fit Lasso regression model
lasso_model = Lasso(alpha=0.01)
lasso_model.fit(X_poly, y)
y_pred_lasso = lasso_model.predict(X_poly)

# Evaluate the models
mse_linear = mean_squared_error(y, y_pred_linear)
mse_ridge = mean_squared_error(y, y_pred_ridge)
mse_lasso = mean_squared_error(y, y_pred_lasso)

print(f"Linear Regression MSE: {mse_linear}")
print(f"Ridge Regression MSE: {mse_ridge}")
print(f"Lasso Regression MSE: {mse_lasso}")



Linear Regression MSE: 0.007662258928756692
Ridge Regression MSE: 0.00856854225343489
Lasso Regression MSE: 0.021462190530445



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.749e+00, tolerance: 4.813e-03



In [325]:
# Visualize the results
fig = go.Figure()

# Add original data points
fig.add_trace(go.Scatter(x=X.flatten(), y=y.flatten(), mode='markers', name='Data', marker=dict(color='lightblue')))

# Add Polynomial Regression line
fig.add_trace(go.Scatter(x=X.flatten(), y=y_pred_linear.flatten(), mode='lines', name='Polynomial Regression', line=dict(color='red')))

# Add Ridge Regression line
fig.add_trace(go.Scatter(x=X.flatten(), y=y_pred_ridge.flatten(), mode='lines', name='Ridge Regression', line=dict(color='green')))

# Add Lasso Regression line
fig.add_trace(go.Scatter(x=X.flatten(), y=y_pred_lasso.flatten(), mode='lines', name='Lasso Regression', line=dict(color='purple')))

# Update layout
fig.update_layout(title='Polynomial, Ridge, and Lasso Regression',
                  xaxis_title='X',
                  yaxis_title='y',
                  legend=dict(x=0.1, y=1.1))

fig.show()
