<a href="https://colab.research.google.com/github/karthik19-cloud/GenAI-Training/blob/main/01-product-sales-prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Create the dataset
data = {
    'TV Budget ($)': [230.1, 44.5, 17.2, 151.5, 180.8, 8.7, 57.5, 120.2, 144.1, 111.6],
    'Radio Budget ($)': [37.8, 39.3, 45.9, 41.3, 10.8, 48.9, 32.8, 19.6, 16.0, 12.6],
    'Newspaper Budget ($)': [69.2, 45.1, 69.3, 58.5, 58.4, 75.0, 23.5, 11.6, 40.3, 37.9],
    'Sales (units)': [22.1, 10.4, 9.3, 18.5, 12.9, 7.2, 11.8, 13.2, 15.6, 12.2]
}

In [3]:
# Convert to DataFrame
df = pd.DataFrame(data)

# Display dataset
print("Dataset:")
print(df)

Dataset:
   TV Budget ($)  Radio Budget ($)  Newspaper Budget ($)  Sales (units)
0          230.1              37.8                  69.2           22.1
1           44.5              39.3                  45.1           10.4
2           17.2              45.9                  69.3            9.3
3          151.5              41.3                  58.5           18.5
4          180.8              10.8                  58.4           12.9
5            8.7              48.9                  75.0            7.2
6           57.5              32.8                  23.5           11.8
7          120.2              19.6                  11.6           13.2
8          144.1              16.0                  40.3           15.6
9          111.6              12.6                  37.9           12.2


In [4]:
# Define features (X) and target (y)
X = df[['TV Budget ($)', 'Radio Budget ($)', 'Newspaper Budget ($)']]
y = df['Sales (units)']


In [5]:
# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)


In [6]:
# Predict on test data
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R²): {r2:.2f}")

# Display coefficients
print("\nModel Coefficients:")
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
})
print(coefficients)

# Display intercept
print(f"\nIntercept: {model.intercept_:.2f}")



Model Evaluation:
Mean Squared Error (MSE): 2.40
R-squared (R²): 0.64

Model Coefficients:
                Feature  Coefficient
0         TV Budget ($)     0.070397
1      Radio Budget ($)     0.197522
2  Newspaper Budget ($)    -0.060750

Intercept: 2.58


In [7]:
# Predict sales for a new set of budgets
new_data = pd.DataFrame({
    'TV Budget ($)': [150],
    'Radio Budget ($)': [20],
    'Newspaper Budget ($)': [30]
})
predicted_sales = model.predict(new_data)
print(f"\nPredicted Sales for new data: {predicted_sales[0]:.2f} units")


Predicted Sales for new data: 15.26 units
