In [1]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score



In [2]:
# Load SALES dataset
df = pd.read_csv("/kaggle/input/sales-data/advertising.csv")
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [3]:
# Split dataset into features and target i.e X,y respectively
X = df.drop(columns=['Sales'])
y = df['Sales']

In [4]:
# Split the data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Train model by using Linear regression
model = LinearRegression()
model.fit(X_train, y_train)

In [6]:
# Prediction on the test data
y_pred = model.predict(X_test)

In [7]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Squared Error: 2.9077569102710896
R-squared: 0.9059011844150826


In [8]:
# We can use the model to predict sales for new data
new_data = {
    'TV': [17.2],
    'Radio': [45.9],
    'Newspaper': [69.3],
}

new_df = pd.DataFrame(new_data)
predicted_sales = model.predict(new_df)

print(f'Predicted Sales: {predicted_sales[0]}')

Predicted Sales: 10.585607619217937
