# SALES PREDICTION USING PYTHON
## Sham Hiruthik
Sales prediction means predicting how much of a product people will buy based on factors
such as the amount you spend to advertise your product, the segment of people you
advertise for, or the platform you are advertising on about your product
Typically, a product and service-based business always need their Data Scientist to predict
their future sales with every step they take to manipulate the cost of advertising their
product. So letâ€™s start the task of sales prediction with machine learning using Python.e.


In [None]:
# Importing lib
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import warnings
warnings.simplefilter('ignore')

In [None]:
# Data ingestion
df = pd.read_csv('Advertising.csv')

In [None]:
# Preview data
df.head()

In [None]:
# Checking for shape
df.shape

In [None]:
# Checking basic information about dataset
df.info()

In [None]:
# Descriptive stats
df.describe()

In [None]:
# Check for columns
df.columns

In [None]:
# Drop 'Unnamed: 0'
df.drop('Unnamed: 0',axis=1,inplace=True)
# Preview the data
df.head()

In [None]:
# Check for duplicates
df.duplicated().sum()

In [None]:
# Check for null values
df.isnull().sum()

In [None]:
# Visualize data
sns.scatterplot(data=df,x='TV',y='Sales',color='blue')
plt.title('TV vs Sales')
plt.show()

In [None]:
sns.scatterplot(data=df,x='Radio',y='Sales',color='blue')
plt.title('Radio vs Sales')
plt.show()

In [None]:
sns.scatterplot(data=df,x='Newspaper',y='Sales',color='blue')
plt.title('Newspaper vs Sales')
plt.show()

In [None]:
# Segregating into independent and dependent
X = df.iloc[:,:-1]
y = df['Sales']

In [None]:
# Train test and split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train,y_test = train_test_split(X,y,test_size = 0.20, random_state = 0)

In [None]:
# Model training
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

In [None]:
models = {
    'dt' : DecisionTreeRegressor(),
    'lr' : LinearRegression(),
    'random' : RandomForestRegressor()
}

In [None]:
report = {}
for i in range(len(list(models))):
            model = list(models.values())[i]
            print(f"Model Training started with {model}")
            model.fit(X_train,y_train)
            print(f"Training completed successfully")
            y_test_pred = model.predict(X_test)
            print("Calculating score")
            test_model_score = r2_score(y_test, y_test_pred)
            print(f"Calculted score: {round(test_model_score*100,2)}% for {model}")
            print("=="*30)
            report[list(models.keys())[i]] = test_model_score

In [None]:
# We will select randomforest
random = RandomForestRegressor()

In [None]:
random.fit(X_train,y_train)
y_pred = random.predict(X_test)

In [None]:
df.iloc[6].values

In [None]:
model.predict([[57.5, 32.8, 23.5]])[0]