In [1]:
#importing all modules or libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
#loading dataset
df=pd.read_csv("Advertising.csv")
df

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9
...,...,...,...,...,...
195,196,38.2,3.7,13.8,7.6
196,197,94.2,4.9,8.1,9.7
197,198,177.0,9.3,6.4,12.8
198,199,283.6,42.0,66.2,25.5


In [3]:
df.columns   #columns names or features names

Index(['Unnamed: 0', 'TV', 'Radio', 'Newspaper', 'Sales'], dtype='object')

In [4]:
#actual dataset
df=df[['TV','Radio','Newspaper','Sales']]
df

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [5]:
#basis information of the dataset.
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


In [6]:
#description of dataset.
df.describe()

Unnamed: 0,TV,Radio,Newspaper,Sales
count,200.0,200.0,200.0,200.0
mean,147.0425,23.264,30.554,14.0225
std,85.854236,14.846809,21.778621,5.217457
min,0.7,0.0,0.3,1.6
25%,74.375,9.975,12.75,10.375
50%,149.75,22.9,25.75,12.9
75%,218.825,36.525,45.1,17.4
max,296.4,49.6,114.0,27.0


In [7]:
#checking the missing values.
df.isna().count()

TV           200
Radio        200
Newspaper    200
Sales        200
dtype: int64

In [8]:
df.shape #it contains 200 records and 4 features

(200, 4)

In [9]:
X = df[['TV', 'Radio', 'Newspaper']]    #independent variables
y = df['Sales']                #dependent variables

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [11]:
# Create and train the linear regression model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_pred = linear_model.predict(X_test)

linear_mse = mean_squared_error(y_test, linear_pred)
linear_r2 = r2_score(y_test, linear_pred)

In [12]:
# Create and train the decision tree regression model
tree_model = DecisionTreeRegressor()
tree_model.fit(X_train, y_train)
tree_pred = tree_model.predict(X_test)

tree_mse = mean_squared_error(y_test, tree_pred)
tree_r2 = r2_score(y_test, tree_pred)

In [13]:
# Create and train the random forest regression model
forest_model = RandomForestRegressor()
forest_model.fit(X_train, y_train)
forest_pred = forest_model.predict(X_test)

forest_mse = mean_squared_error(y_test, forest_pred)
forest_r2 = r2_score(y_test, forest_pred)

In [14]:
# Print the evaluation metrics for each model
print("Linear Regression:")
print("Mean Squared Error:", linear_mse)
print("R^2 Score:", linear_r2)
print()

print("Decision Tree Regression:")
print("Mean Squared Error:", tree_mse)
print("R^2 Score:", tree_r2)
print()

print("Random Forest Regression:")
print("Mean Squared Error:", forest_mse)
print("R^2 Score:", forest_r2)
print()


Linear Regression:
Mean Squared Error: 1.9059420857342562
R^2 Score: 0.9160691983045218

Decision Tree Regression:
Mean Squared Error: 2.39875
R^2 Score: 0.8943677186867579

Random Forest Regression:
Mean Squared Error: 0.6651088249999996
R^2 Score: 0.9707110109405649



In [15]:
# Determine the best model based on the evaluation metrics
best_model = min([(linear_mse, 'Linear Regression'),
                  (tree_mse, 'Decision Tree Regression'),
                  (forest_mse, 'Random Forest Regression')])

print("Best Model:", best_model[1])
print("Mean Squared Error:", best_model[0])

Best Model: Random Forest Regression
Mean Squared Error: 0.6651088249999996


In [16]:
# Predict sales for a new set of advertising inputs
new_data = [[100, 25, 10]]  # Replace with your new advertising inputs
new_pred =forest_model.predict(new_data)
print("Predicted Sales:", new_pred[0])

Predicted Sales: 12.515999999999998


