<a href="https://colab.research.google.com/github/karthik19-cloud/GenAI-Training/blob/main/01_product_sales_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ============================================
# 📊 Multivariate Linear Regression Model
# Predicting Sales based on Advertising Budgets
# ============================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
# =====================
# 1️⃣ Load the Dataset
# =====================
df = pd.read_csv('advertising.csv')

# Display dataset
print("✅ Dataset Loaded Successfully:")
print(df.head())

Dataset:
   TV Budget ($)  Radio Budget ($)  Newspaper Budget ($)  Sales (units)
0          230.1              37.8                  69.2           22.1
1           44.5              39.3                  45.1           10.4
2           17.2              45.9                  69.3            9.3
3          151.5              41.3                  58.5           18.5
4          180.8              10.8                  58.4           12.9
5            8.7              48.9                  75.0            7.2
6           57.5              32.8                  23.5           11.8
7          120.2              19.6                  11.6           13.2
8          144.1              16.0                  40.3           15.6
9          111.6              12.6                  37.9           12.2


In [None]:
# ===========================
# 2️⃣ Explore the Data
# ===========================
print("\nDataset Summary:")
print(df.describe())

# Correlation Matrix
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("🔗 Correlation Heatmap of Features")
plt.show()

# Pairplot (relationships between variables)
sns.pairplot(df)
plt.suptitle("📈 Pairplot of Advertising Budgets vs Sales", y=1.02)
plt.show()

In [None]:
# ===========================
# 3️⃣ Prepare Data for Model
# ===========================
X = df[['TV Budget ($)', 'Radio Budget ($)', 'Newspaper Budget ($)']]
y = df['Sales (units)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# ===========================
# 4️⃣ Train the Model
# ===========================
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
# ===========================
# 5️⃣ Evaluate the Model
# ===========================
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\n📊 Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R²): {r2:.2f}")

# Coefficients
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
})
print("\nModel Coefficients:")
print(coefficients)
print(f"\nIntercept: {model.intercept_:.2f}")

In [None]:
# ===========================
# 6️⃣ Visualization
# ===========================

# Actual vs Predicted plot
plt.figure(figsize=(7,5))
plt.scatter(y_test, y_pred, color='blue', edgecolors='k')
plt.xlabel("Actual Sales (units)")
plt.ylabel("Predicted Sales (units)")
plt.title("🎯 Actual vs Predicted Sales")
plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', linestyle='--')
plt.show()

In [None]:
# ===========================
# 7️⃣ Predict on New Data
# ===========================
new_data = pd.DataFrame({
    'TV Budget ($)': [150],
    'Radio Budget ($)': [20],
    'Newspaper Budget ($)': [30]
})
predicted_sales = model.predict(new_data)
print(f"\n🔮 Predicted Sales for new data: {predicted_sales[0]:.2f} units")