In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Step 1: Importing Libraries and Loading Data
data = pd.read_csv('advertising.csv')

# Step 2: Exploratory Data Analysis (EDA)
print(data.head())
print(data.info())
print(data.describe())

# Step 3: Data Preprocessing
data.dropna(inplace=True)

# One-hot encode categorical variables (if needed)
# data = pd.get_dummies(data, columns=['target_audience_segment', 'advertising_platform'], drop_first=True)

# Step 4: Feature Selection
X = data[['TV', 'Radio', 'Newspaper']]
y = data['Sales']

# Step 5: Model Selection
model = LinearRegression()

# Step 6: Model Training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

# Step 7: Model Evaluation
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Step 8: Sales Prediction
# Replace with actual feature values for prediction
new_data = pd.DataFrame({
    'TV': [200],
    'Radio': [50],
    'Newspaper': [30]
})

predicted_sales = model.predict(new_data)
print(f"Predicted Sales: {predicted_sales[0]}")





      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB
None
               TV       Radio   Newspaper       Sales
count  200.000000  200.000000  200.000000  200.000000
mean   147.042500   23.264000   30.554000   15.130500
std     85.854236   14.846809   21.778621    5.283892
min      0.700000    0.000000    0.300000    1.600000
25%     74.375000    9.975000   12.750000   11.000000
50%    149.750000   22.900000   25.750000   16.000000
75%    218.825000   36.525000