In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [2]:
data=pd.read_csv('advertising.csv')

In [None]:
data.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,14.0
197,177.0,9.3,6.4,14.8
198,283.6,42.0,66.2,25.5


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


In [None]:
data.describe()

TV     Radio  Newspaper  Sales
0.7    39.6   8.7        1.6      1
206.9  8.4    26.4       17.9     1
197.6  3.5    5.9        16.7     1
       23.3   14.2       16.6     1
198.9  49.4   60.0       23.7     1
                                 ..
97.5   7.6    7.2        13.7     1
100.4  9.6    3.6        10.7     1
102.7  29.6   8.4        14.0     1
104.6  5.7    34.4       10.4     1
296.4  36.3   100.9      23.8     1
Name: count, Length: 200, dtype: int64

In [6]:
data.isnull().sum()

TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64

## Prepare Data for Training

In [None]:
# Separate features (X) and target (y)
# Assuming 'Sales' is the target column and TV, Radio, Newspaper are features
X = data[['TV', 'Radio', 'Newspaper']]
y = data['Sales']

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")

In [None]:
# Split data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape}")
print(f"Testing set size: {X_test.shape}")

## Train Linear Regression Model

In [None]:
# Create and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

print("Model training completed!")
print(f"\nModel Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.4f}")
print(f"\nIntercept: {model.intercept_:.4f}")

## Make Predictions and Evaluate

In [None]:
# Make predictions on training data
y_train_pred = model.predict(X_train)

# Evaluate on training data
train_mse = mean_squared_error(y_train, y_train_pred)
train_rmse = np.sqrt(train_mse)
train_mae = mean_absolute_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)

print("Training Set Performance:")
print(f"R² Score: {train_r2:.4f}")
print(f"Mean Absolute Error (MAE): {train_mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {train_rmse:.4f}")

In [None]:
# Make predictions on test data
y_test_pred = model.predict(X_test)

# Evaluate on test data
test_mse = mean_squared_error(y_test, y_test_pred)
test_rmse = np.sqrt(test_mse)
test_mae = mean_absolute_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)

print("Test Set Performance:")
print(f"R² Score: {test_r2:.4f}")
print(f"Mean Absolute Error (MAE): {test_mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {test_rmse:.4f}")

In [None]:
# Compare actual vs predicted values
comparison = pd.DataFrame({
    'Actual Sales': y_test.values,
    'Predicted Sales': y_test_pred,
    'Difference': y_test.values - y_test_pred
})

print("\nSample Predictions:")
print(comparison.head(10))