# Experiment 3: Linear Regression

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
df = pd.read_csv('advertising.csv')
df.head()

In [None]:
# Check data info
print(df.shape)
print(df.info())
print(df.isnull().sum())

In [None]:
# Select features and target
X = df[['TV']]
y = df['Sales']

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=41)

# Build model
model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)

In [None]:
# Evaluate model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print('Root Mean Squared Error:', rmse)
print('R-squared:', r2)

In [None]:
# Plot Actual vs Predicted
plt.scatter(y_test, y_pred)
plt.xlabel('Actual Sales')
plt.ylabel('Predicted Sales')
plt.title('Linear Regression Predictions')
lims = [min(min(y_test), min(y_pred)), max(max(y_test), max(y_pred))]
plt.plot(lims, lims, 'k--')
plt.show()

In [None]:
# Residual Analysis
residuals = y_test - y_pred
plt.scatter(y_test, residuals)
plt.axhline(y=0, color='k', linestyle='--')
plt.xlabel('Actual Sales')
plt.ylabel('Residuals')
plt.title('Residual Analysis')
plt.show()