In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

# Load and prepare data
data = pd.read_csv('advertising.csv')
X = data[['TV', 'Radio', 'Newspaper']]
y = data['Sales']

# Split data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate model
train_pred = model.predict(X_train)
test_pred = model.predict(X_test)

print("Model Performance:")
print(f"Training R-squared: {r2_score(y_train, train_pred):.4f}")
print(f"Test R-squared: {r2_score(y_test, test_pred):.4f}")
print(f"Training RMSE: {mean_squared_error(y_train, train_pred):.4f}")
print(f"Test RMSE: {mean_squared_error(y_test, test_pred):.4f}")

# Show feature importance
print("\nFeature Coefficients (Impact on Sales):")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.4f}")

# Function to predict new values
def predict_sales(tv, radio, newspaper):
    """Predict sales for new advertising"""
    new_data = np.array([[tv, radio, newspaper]])
    prediction = model.predict(new_data)
    return prediction[0]

# Example prediction
print("\nExample Predictions:")
print(f"230.1k TV, 37.8k Radio, 69.2k Newspaper: {predict_sales(230.1, 37.8, 69.2):.2f} thousand units")
print(f"100k TV, 25k Radio, 10k Newspaper: {predict_sales(100, 25, 10):.2f} thousand units")
print(f"300k TV, 50k Radio, 0k Newspaper: {predict_sales(300, 50, 0):.2f} thousand units")

Model Performance:
Training R-squared: 0.9001
Test R-squared: 0.9059
Training RMSE: 2.6761
Test RMSE: 2.9078

Feature Coefficients (Impact on Sales):
TV: 0.0545
Radio: 0.1009
Newspaper: 0.0043

Example Predictions:
230.1k TV, 37.8k Radio, 69.2k Newspaper: 21.37 thousand units
100k TV, 25k Radio, 10k Newspaper: 12.73 thousand units
300k TV, 50k Radio, 0k Newspaper: 26.11 thousand units


