Import libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

Create dataset

In [None]:
# Create a synthetic dataset
np.random.seed(42)
X = 2 * np.random.rand(100, 1)  # 100 random points
y = 4 + 3 * X + np.random.randn(100, 1)  # Linear relation with some noise

Visualize the data

In [None]:
plt.scatter(X, y)
plt.title("Scatter plot of X vs y")
plt.xlabel("X")
plt.ylabel("y")
plt.show()

Split the data into training and test data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

Create and train the linear regression model

In [None]:
# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

Make predictions on the test set

In [None]:
# Predict using the model
y_pred = model.predict(X_test)

Evaluate the model

In [None]:
# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Calculate the Coefficient of Determination (R²)
r2 = r2_score(y_test, y_pred)
print(f"R²: {r2}")

# Print the model coefficients
print(f"Intercept: {model.intercept_}")
print(f"Coefficient: {model.coef_}")

Visualize the regression line

In [None]:
# Plot the regression line
plt.scatter(X_test, y_test, color='black')
plt.plot(X_test, y_pred, color='blue', linewidth=3)
plt.title("Linear Regression Fit")
plt.xlabel("X")
plt.ylabel("y")
plt.show()


Explanation of the Output:
Scatter Plot: Visualizes the relationship between the independent variable (X) and the dependent variable (y).
Mean Squared Error (MSE): A metric that indicates the average squared difference between observed actual outcomes and predictions made by the model. Lower values indicate a better fit.
Coefficient of Determination (R²): Indicates how well the independent variable(s) explain the variability of the dependent variable. Values closer to 1.0 suggest a better fit.
Model Coefficients: The intercept and the slope of the regression line.

9. Interpret the Results
In this example, we generated data where y = 4 + 3X + noise, and the model should estimate coefficients close to these values. The plot will show the linear relationship fitted by the model, while the R² and MSE provide metrics to assess the model's performance.

This example illustrates the core steps for implementing linear regression in Python using scikit-learn.