# SALES PREDICTION USING PYTHON

In [1]:
#Step 1: Load the Dataset

In [2]:
import pandas as pd

# Load the dataset
data = pd.read_csv('advertising.csv')
data

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,14.0
197,177.0,9.3,6.4,14.8
198,283.6,42.0,66.2,25.5


In [3]:
#Step 2: Exploratory Data Analysis (EDA)

In [4]:
# Display the first few rows of the dataset
print(data.head())

# Display information about the dataset
print(data.info())

# Summary statistics of the dataset
print(data.describe())

      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB
None
               TV       Radio   Newspaper       Sales
count  200.000000  200.000000  200.000000  200.000000
mean   147.042500   23.264000   30.554000   15.130500
std     85.854236   14.846809   21.778621    5.283892
min      0.700000    0.000000    0.300000    1.600000
25%     74.375000    9.975000   12.750000   11.000000
50%    149.750000   22.900000   25.750000   16.000000
75%    218.825000   36.525000

In [5]:
#Step 3: Data Preprocessing

In [6]:
# Check for missing values
print(data.isnull().sum())

TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64


In [7]:
#Step 4: Feature Selection/Engineering

In [8]:
# Split the data into features and target variable
X = data.drop('Sales', axis=1)
y = data['Sales']

In [9]:
#Step 5: Model Selection

In [10]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()

In [11]:
#Step 6: Model Training

In [12]:
from sklearn.model_selection import train_test_split

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

In [13]:
#Step 7: Model Evaluation

In [14]:
from sklearn.metrics import mean_squared_error, r2_score

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R^2 Score:", r2)

Mean Squared Error: 2.9077569102710905
R^2 Score: 0.9059011844150826


In [15]:
#Step 8: Make Predictions

In [16]:
# Example: Predict sales for new data
new_data = pd.DataFrame({'TV': [200, 300, 400], 'Radio': [40, 50, 60], 'Newspaper': [20, 30, 40]})
predicted_sales = model.predict(new_data)
print("Predicted Sales for New Data:")

print(predicted_sales)

Predicted Sales for New Data:
[19.740528   26.24427518 32.74802235]
