# SALES PREDICTION USING PYTHON

![](https://assets.website-files.com/60e7f71b22c6d0b9cf329ceb/621e1a2f28ded71ee95aeede_6ProvenSalesForecastingMethodstoDriveRevenue1_a117440b5ae227c3dba5264a6521da06_2000.png)

Sales prediction with Python is a powerful tool that businesses can use to forecast future sales based on a variety of factors. By leveraging machine learning techniques, companies can analyze historical data to build predictive models that can be used to make data-driven decisions about marketing strategies, resource allocation, and revenue optimization. By continuously updating the models with new data, businesses can adapt to market dynamics and stay ahead of the competition.

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.tight_layout()
import os
import statsmodels.formula.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import warnings

In [None]:
#set the warning filter
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
#Load dataset
data_path = "/kaggle/input/advertisingcsv/Advertising.csv"
df = pd.read_csv(data_path)

### Exploratory Data Analysis

In [None]:
# Display the first few rows of the dataset
df.head()

In [None]:
# Get the column names of the dataset
df.columns

In [None]:
# To rename the column 'Unnamed: 0' to 'Index'
df.rename(columns={'Unnamed: 0': 'Index'}, inplace=True)

In [None]:
# Get the shape of the dataset (rows, columns)
df.shape

In [None]:
# Check information about the dataset, data types, and missing values
df.info()

In [None]:
df

In [None]:
# Get statistical summary of the numerical columns
df.describe().T

In [None]:
# Check for missing values in the dataset
df.isnull().values.any()
df.isnull().sum()

### Data Visualization

In [None]:
# Scatter plots to check the linearity assumption between each independent variable (TV, Radio, Newspaper) and the dependent variable (Sales)
sns.pairplot(df, x_vars=["TV", "Radio", "Newspaper"], y_vars="Sales", kind="reg")

In [None]:
# Histograms to check the normality assumption of the dependent variable (Sales)
df.hist(bins=20, figsize=(12, 9))

In [None]:
# Linear regression plots to visualize the relationship between each independent variable and the dependent variable
sns.lmplot(x='TV', y='Sales', data=df)
sns.lmplot(x='Radio', y='Sales', data=df)
sns.lmplot(x='Newspaper', y='Sales', data=df)

In [None]:
# Correlation Heatmap to check for multicollinearity among independent/dependent variables
corrmat = df.corr()
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corrmat, vmin=0, vmax=1, square=True, cmap="cividis", ax=ax)
plt.show()

In [None]:
# Model Preparation

X = df.drop('Sales', axis=1)
y = df[["Sales"]]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=46)

In [None]:
# Linear Regression Model

lin_model = sm.ols(formula="Sales ~ TV + Radio + Newspaper", data=df).fit()

In [None]:
# Print the coefficients of the linear model
print(lin_model.params, "\n")

In [None]:
# Print the summary of the linear regression model
print(lin_model.summary())

### Evaluate the model

In [None]:
# Define a list of models to evaluate
models = [('LinearRegression', LinearRegression())]

In [None]:
for name, model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"{name}: RMSE = {rmse:.2f}")

In [None]:
# Make predictions on new data
new_data_1 = pd.DataFrame({'TV': [100], 'Radio': [50], 'Newspaper': [25]})
predicted_sales_1 = lin_model.predict(new_data_1)
print("Predicted Sales (Data 1):", predicted_sales_1)

In [None]:
new_data_2 = pd.DataFrame({'TV': [25], 'Radio': [63], 'Newspaper': [80]})
predicted_sales_2 = lin_model.predict(new_data_2)
print("Predicted Sales (Data 2):", predicted_sales_2)

In [None]:
nan