In [5]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Step 1: Load the dataset (taking the CSV file path as user input)
file_path = input("Please enter the path to the Sales dataset CSV file: ")

# Read the dataset into a pandas DataFrame
data = pd.read_csv('/content/advertising (1).csv')

# Step 2: Explore the dataset
print(data.head())  # Display the first few rows to understand the structure

# Step 3: Check for missing values (if any)
print("\nMissing values in the dataset:")
print(data.isnull().sum())

# Step 4: Split the data into features (X) and target (y)
X = data[['TV', 'Radio', 'Newspaper']]  # Features: Advertising expenditures
y = data['Sales']  # Target: Sales

# Step 5: Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Feature scaling (Optional, but often helps improve model performance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit and transform the training data
X_test = scaler.transform(X_test)  # Only transform the test data

# Step 7: Train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 8: Make predictions on the test data
y_pred = model.predict(X_test)

# Step 9: Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'\nMean Squared Error: {mse:.4f}')
print(f'R-squared: {r2:.4f}')

# Step 10: Predict sales for new data (example)
new_data = [[150, 30, 20]]  # Example input: TV, Radio, Newspaper expenditures
new_data_scaled = scaler.transform(new_data)  # Scale the new data
predicted_sales = model.predict(new_data_scaled)
print(f"\nPredicted sales for new data (TV=150, Radio=30, Newspaper=20): {predicted_sales[0]:.2f}")


Please enter the path to the Sales dataset CSV file: /content/advertising (1).csv
      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9

Missing values in the dataset:
TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64

Mean Squared Error: 2.9078
R-squared: 0.9059

Predicted sales for new data (TV=150, Radio=30, Newspaper=20): 16.01


