In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

In [2]:
try:
    df = pd.read_csv('C:/Users/Akash/OneDrive - Erin.N.Nagarvala Day School/Desktop/jupyter notebook/advertising1.csv')
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print("Error: 'advertising.csv' not found. Please ensure the file is uploaded.")
    exit()

Dataset loaded successfully!


In [3]:

print("\n--- Original Dataset Head ---")
print(df.head())
print("\n--- Dataset Info ---")
df.info()
print("\n--- Missing Values Before Preprocessing ---")
print(df.isnull().sum())



--- Original Dataset Head ---
   Unnamed: 0     TV  Radio  Newspaper  Sales
0           1  230.1   37.8       69.2   22.1
1           2   44.5   39.3       45.1   10.4
2           3   17.2   45.9       69.3    9.3
3           4  151.5   41.3       58.5   18.5
4           5  180.8   10.8       58.4   12.9

--- Dataset Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  200 non-null    int64  
 1   TV          200 non-null    float64
 2   Radio       200 non-null    float64
 3   Newspaper   200 non-null    float64
 4   Sales       200 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 7.9 KB

--- Missing Values Before Preprocessing ---
Unnamed: 0    0
TV            0
Radio         0
Newspaper     0
Sales         0
dtype: int64


In [4]:
df.dropna(inplace=True)
print("\n--- Missing Values After Preprocessing (if any) ---")
print(df.isnull().sum())



--- Missing Values After Preprocessing (if any) ---
Unnamed: 0    0
TV            0
Radio         0
Newspaper     0
Sales         0
dtype: int64


In [5]:
print("\n--- Preprocessed Dataset Head ---")
print(df.head())



--- Preprocessed Dataset Head ---
   Unnamed: 0     TV  Radio  Newspaper  Sales
0           1  230.1   37.8       69.2   22.1
1           2   44.5   39.3       45.1   10.4
2           3   17.2   45.9       69.3    9.3
3           4  151.5   41.3       58.5   18.5
4           5  180.8   10.8       58.4   12.9


In [6]:
X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")
print(f"Training target shape: {y_train.shape}")
print(f"Testing target shape: {y_test.shape}")



Training features shape: (160, 3)
Testing features shape: (40, 3)
Training target shape: (160,)
Testing target shape: (40,)


In [7]:
model = LinearRegression()
model.fit(X_train, y_train)

print("\n--- Model Training Complete (Linear Regression) ---")
print(f"Model Coefficients: {model.coef_}")
print(f"Model Intercept: {model.intercept_}")



--- Model Training Complete (Linear Regression) ---
Model Coefficients: [0.04472952 0.18919505 0.00276111]
Model Intercept: 2.9790673381226256


In [8]:
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse) 
r2 = r2_score(y_test, y_pred)

print(f"\nMean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R2) Score: {r2:.4f}")



Mean Absolute Error (MAE): 1.4608
Mean Squared Error (MSE): 3.1741
Root Mean Squared Error (RMSE): 1.7816
R-squared (R2) Score: 0.8994


In [9]:
print("\n--- Example Sales Prediction ---")

new_ad_spend = pd.DataFrame([[200, 40, 20]], columns=['TV', 'Radio', 'Newspaper'])

predicted_sales = model.predict(new_ad_spend)

print(f"Advertising Spend (TV, Radio, Newspaper): {new_ad_spend.values[0]}")
print(f"Predicted Sales: {predicted_sales[0]:.2f}")



--- Example Sales Prediction ---
Advertising Spend (TV, Radio, Newspaper): [200  40  20]
Predicted Sales: 19.55


In [10]:
new_ad_spend_2 = pd.DataFrame([[50, 10, 5]], columns=['TV', 'Radio', 'Newspaper'])
predicted_sales_2 = model.predict(new_ad_spend_2)

print(f"\nAdvertising Spend (TV, Radio, Newspaper): {new_ad_spend_2.values[0]}")
print(f"Predicted Sales: {predicted_sales_2[0]:.2f}")


Advertising Spend (TV, Radio, Newspaper): [50 10  5]
Predicted Sales: 7.12
