In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.cluster import KMeans


In [None]:
  # Step 1: Load Dataset (Assuming you downloaded 'Advertising.csv')
df = pd.read_csv('Advertising Budget and Sales.csv')


FileNotFoundError: [Errno 2] No such file or directory: 'Advertising Budget and Sales.csv'

In [None]:
# Step 2: Explore and Preprocess Data
print(df.info())            # Check data types and non-null counts
print(df.isnull().sum())

In [None]:
features = ['TV', 'Radio', 'Newspaper']
target = 'Sales'

In [None]:
# Pairplot of features
sns.pairplot(df[features])
plt.suptitle("Pairplot of Features", y=1.02)
plt.show()

In [None]:
print(df.columns)



In [None]:
features = ['TV Ad Budget ($)', 'Radio Ad Budget ($)', 'Newspaper Ad Budget ($)']
target = 'Sales ($)'


In [None]:
sns.pairplot(df[features])


In [None]:
df.rename(columns={
    'TV Ad Budget ($)': 'TV',
    'Radio Ad Budget ($)': 'Radio',
    'Newspaper Ad Budget ($)': 'Newspaper',
    'Sales ($)': 'Sales'
}, inplace=True)

features = ['TV', 'Radio', 'Newspaper']
target = 'Sales'
sns.pairplot(df[features])


In [None]:
# Step 3: Split data into train and test sets
X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Step 4: Train Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)


In [None]:

# Step 5: Train Polynomial Regression (degree=2)
poly_pipeline = Pipeline([
    ('poly_features', PolynomialFeatures(degree=2)),
    ('lin_reg', LinearRegression())
])
poly_pipeline.fit(X_train, y_train)
y_pred_poly = poly_pipeline.predict(X_test)

In [None]:

# Step 6: Train Support Vector Regression (SVR) with RBF kernel
# SVR needs feature scaling
from sklearn.preprocessing import StandardScaler

scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).ravel()

svr_model = SVR(kernel='rbf')
svr_model.fit(X_train_scaled, y_train_scaled)
y_pred_svr_scaled = svr_model.predict(X_test_scaled)
y_pred_svr = scaler_y.inverse_transform(y_pred_svr_scaled.reshape(-1, 1)).ravel()


In [None]:
# Step 7: Evaluate Models
def print_metrics(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    print(f"{model_name} -> MAE: {mae:.3f}, MSE: {mse:.3f}, RMSE: {rmse:.3f}")
    return mae, mse, rmse

mae_lr, mse_lr, rmse_lr = print_metrics(y_test, y_pred_lr, "Linear Regression")
mae_poly, mse_poly, rmse_poly = print_metrics(y_test, y_pred_poly, "Polynomial Regression")
mae_svr, mse_svr, rmse_svr = print_metrics(y_test, y_pred_svr, "SVR")

# Bar graph comparison
metrics = ['MAE', 'MSE', 'RMSE']
lr_scores = [mae_lr, mse_lr, rmse_lr]
poly_scores = [mae_poly, mse_poly, rmse_poly]
svr_scores = [mae_svr, mse_svr, rmse_svr]

x = np.arange(len(metrics))
width = 0.25

fig, ax = plt.subplots()
ax.bar(x - width, lr_scores, width, label='Linear Regression')
ax.bar(x, poly_scores, width, label='Polynomial Regression')
ax.bar(x + width, svr_scores, width, label='SVR')

ax.set_xticks(x)
ax.set_xticklabels(metrics)
ax.set_ylabel('Error')
ax.set_title('Model Comparison')
ax.legend()
plt.show()


In [None]:
# Step 8: Apply K-Means Clustering (3 clusters) on features
kmeans = KMeans(n_clusters=3, random_state=42)
df['Cluster'] = kmeans.fit_predict(X)


In [None]:
import seaborn as sns
sns.scatterplot(
    data=df,
    x='TV Ad Budget ($)',
    y='Sales ($)'
)
plt.title('K-Means Clustering')
plt.xlabel('TV Ad Budget ($)')
plt.ylabel('Sales ($)')
plt.legend(title='Clusters')
plt.grid(True)
plt.show();
