In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset (replace 'sales_data.csv' with the actual file path)
# Assume the dataset has columns: TV, Radio, Newspaper, and Sales
data = pd.read_csv('Advertising.csv')

# Display the first few rows of the dataset
print("Dataset Preview:\n", data.head())

# Check for missing values
print("\nMissing Values:\n", data.isnull().sum())

# Define the feature matrix (X) and target variable (y)
X = data[['TV', 'Radio', 'Newspaper']]  # Advertising budgets
y = data['Sales']  # Sales

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict sales on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R2 Score): {r2:.2f}")

# Display the coefficients
coefficients = pd.DataFrame({'Feature': X.columns, 'Coefficient': model.coef_})
print("\nModel Coefficients:\n", coefficients)

# Predict sales for new data (example)
new_data = pd.DataFrame({
    'TV': [200],
    'Radio': [150],
    'Newspaper': [50]
})
predicted_sales = model.predict(new_data)
print(f"\nPredicted Sales for New Data: {predicted_sales[0]:.2f}")


Dataset Preview:
    Unnamed: 0     TV  Radio  Newspaper  Sales
0           1  230.1   37.8       69.2   22.1
1           2   44.5   39.3       45.1   10.4
2           3   17.2   45.9       69.3    9.3
3           4  151.5   41.3       58.5   18.5
4           5  180.8   10.8       58.4   12.9

Missing Values:
 Unnamed: 0    0
TV            0
Radio         0
Newspaper     0
Sales         0
dtype: int64

Model Evaluation:
Mean Squared Error (MSE): 3.17
R-squared (R2 Score): 0.90

Model Coefficients:
      Feature  Coefficient
0         TV     0.044730
1      Radio     0.189195
2  Newspaper     0.002761

Predicted Sales for New Data: 40.44


In [3]:
# Create a sample dataset
data = {
    'TV': [230.1, 44.5, 17.2, 151.5, 180.8],
    'Radio': [37.8, 39.3, 45.9, 41.3, 10.8],
    'Newspaper': [69.2, 45.1, 69.3, 58.5, 58.4],
    'Sales': [22.1, 10.4, 9.3, 18.5, 12.9]
}
df = pd.DataFrame(data)
df.to_csv('Advertising.csv', index=False)
