# Importing necessary libraries

In [2]:
import pandas as pd
import numpy
import matplotlib
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split

## 1. Load `Advertising.csv` dataset pandas

In [4]:
# Load the dataset
df = pd.read_csv('advertising.csv', index_col=0)

# Display the first few rows of the dataframe
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


## 2. Standardize each column of the dataset
### Standardize Each Column Using sklearn's `preprocessing.scale`

In [6]:
# Standardize the dataset using sklearn's scale function
df_standardized = pd.DataFrame(scale(df), columns=df.columns)

# Display the first few rows of the standardized dataframe
df_standardized.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,0.969852,0.981522,1.778945,1.552053
1,-1.197376,1.082808,0.669579,-0.696046
2,-1.516155,1.528463,1.783549,-0.907406
3,0.05205,1.217855,1.286405,0.86033
4,0.394182,-0.841614,1.281802,-0.215683


### 3. Add column of ones

In [8]:
# Add an intercept column (all ones)
df_standardized['Intercept'] = 1

# Reorder columns to place the intercept column at the beginning
df_standardized = df_standardized[['Intercept'] + [col for col in df_standardized.columns if col != 'Intercept']]

# Display the first few rows of the updated dataframe
df_standardized.head()

Unnamed: 0,Intercept,TV,Radio,Newspaper,Sales
0,1,0.969852,0.981522,1.778945,1.552053
1,1,-1.197376,1.082808,0.669579,-0.696046
2,1,-1.516155,1.528463,1.783549,-0.907406
3,1,0.05205,1.217855,1.286405,0.86033
4,1,0.394182,-0.841614,1.281802,-0.215683


### Divide the dataset into training and testing

In [10]:
# Define features and target variable
X = df[['TV', 'Radio', 'Newspaper']]  # Features
y = df['Sales']  # Target variable

# Split the dataset into training (85%) and testing (15%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

# Display the shapes of the resulting datasets
print(f"Training set shape: {X_train.shape}, {y_train.shape}")
print(f"Testing set shape: {X_test.shape}, {y_test.shape}")

Training set shape: (170, 3), (170,)
Testing set shape: (30, 3), (30,)
