In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd

data = pd.read_csv('advertising.csv')

print(data.head())  
print(data.info())  

data.fillna(method='bfill', inplace=True)

features = data[['TV', 'Radio', 'Newspaper']] 
target = data['Sales']  

scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

regressor = GradientBoostingRegressor(n_estimators=100, random_state=42)

regressor.fit(X_train, y_train)

predictions = regressor.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

def estimate_sales(tv_budget, radio_budget, newspaper_budget):
    input_data = pd.DataFrame({
        'TV': [tv_budget],
        'Radio': [radio_budget],
        'Newspaper': [newspaper_budget]
    })

    input_data_scaled = scaler.transform(input_data)
    sales_forecast = regressor.predict(input_data_scaled)
    return sales_forecast[0]

predicted_sales = estimate_sales(tv_budget=150, radio_budget=30, newspaper_budget=20)
print(f"Predicted Sales: {predicted_sales}")

importance = regressor.feature_importances_
print("Feature Importance:", importance)

search_space = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10]
}
random_search = RandomizedSearchCV(estimator=regressor, param_distributions=search_space, n_iter=10, cv=3)
random_search.fit(X_train, y_train)
print(f"Optimal Parameters from Random Search: {random_search.best_params_}")


      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB
None
Mean Absolute Error (MAE): 0.826613697531743
Root Mean Squared Error (RMSE): 1.117085045056966
Predicted Sales: 15.330947272236232
Feature Importance: [0.85278334 0.13544096 0.0117757 ]




Optimal Parameters from Random Search: {'n_estimators': 200, 'max_depth': 5}
