In [72]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

In [73]:
try:
    df = pd.read_csv('C:/Users/Akash/OneDrive - Erin.N.Nagarvala Day School/Desktop/jupyter notebook/advertising.csv')
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print("Error: 'advertising.csv' not found. Please ensure the file is uploaded.")
    exit()

Dataset loaded successfully!


In [74]:

print("\n--- Original Dataset Head ---")
print(df.head())
print("\n--- Dataset Info ---")
df.info()
print("\n--- Missing Values Before Preprocessing ---")
print(df.isnull().sum())



--- Original Dataset Head ---
      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9

--- Dataset Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB

--- Missing Values Before Preprocessing ---
TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64


In [75]:
df.dropna(inplace=True)
print("\n--- Missing Values After Preprocessing (if any) ---")
print(df.isnull().sum())



--- Missing Values After Preprocessing (if any) ---
TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64


In [76]:
print("\n--- Preprocessed Dataset Head ---")
print(df.head())



--- Preprocessed Dataset Head ---
      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9


In [77]:
X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")
print(f"Training target shape: {y_train.shape}")
print(f"Testing target shape: {y_test.shape}")



Training features shape: (160, 3)
Testing features shape: (40, 3)
Training target shape: (160,)
Testing target shape: (40,)


In [78]:
model = LinearRegression()
model.fit(X_train, y_train)

print("\n--- Model Training Complete (Linear Regression) ---")
print(f"Model Coefficients: {model.coef_}")
print(f"Model Intercept: {model.intercept_}")



--- Model Training Complete (Linear Regression) ---
Model Coefficients: [0.05450927 0.10094536 0.00433665]
Model Intercept: 4.714126402214127


In [79]:
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse) 
r2 = r2_score(y_test, y_pred)

print(f"\nMean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R2) Score: {r2:.4f}")



Mean Absolute Error (MAE): 1.2748
Mean Squared Error (MSE): 2.9078
Root Mean Squared Error (RMSE): 1.7052
R-squared (R2) Score: 0.9059


In [80]:
print("\n--- Example Sales Prediction ---")

new_ad_spend = pd.DataFrame([[200, 40, 20]], columns=['TV', 'Radio', 'Newspaper'])

predicted_sales = model.predict(new_ad_spend)

print(f"Advertising Spend (TV, Radio, Newspaper): {new_ad_spend.values[0]}")
print(f"Predicted Sales: {predicted_sales[0]:.2f}")



--- Example Sales Prediction ---
Advertising Spend (TV, Radio, Newspaper): [200  40  20]
Predicted Sales: 19.74


In [81]:
new_ad_spend_2 = pd.DataFrame([[50, 10, 5]], columns=['TV', 'Radio', 'Newspaper'])
predicted_sales_2 = model.predict(new_ad_spend_2)

print(f"\nAdvertising Spend (TV, Radio, Newspaper): {new_ad_spend_2.values[0]}")
print(f"Predicted Sales: {predicted_sales_2[0]:.2f}")


Advertising Spend (TV, Radio, Newspaper): [50 10  5]
Predicted Sales: 8.47
