In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.express as px
import altair as alt

In [2]:
df = pd.read_csv("/kaggle/input/product-advertising-data/Advertising_Data.csv")

In [3]:
df.head()

Unnamed: 0,TV,Billboards,Google_Ads,Social_Media,Influencer_Marketing,Affiliate_Marketing,Product_Sold
0,281.42,538.8,123.94,349.3,242.77,910.1,7164.0
1,702.97,296.53,558.13,180.55,781.06,132.43,5055.0
2,313.14,295.94,642.96,505.71,438.91,464.23,6154.0
3,898.52,61.27,548.73,240.93,278.96,432.27,5480.0
4,766.52,550.72,651.91,666.33,396.33,841.93,9669.0


In [4]:
print(df.isnull().sum())
print(' \nDear Dataset,Gratitude for being flawlessly complete! Your absence of null values has made analysis a breeze. 🚀 Thanks for the seamless journey.')

TV                      0
Billboards              0
Google_Ads              0
Social_Media            0
Influencer_Marketing    0
Affiliate_Marketing     0
Product_Sold            0
dtype: int64
 
Dear Dataset,Gratitude for being flawlessly complete! Your absence of null values has made analysis a breeze. 🚀 Thanks for the seamless journey.


In [5]:
df_excluded_product_sold = df.iloc[:, :-1]
# Melt the DataFrame for easier plotting
df_melted = pd.melt(df_excluded_product_sold, var_name='Channel', value_name='Cost')
fig = px.bar(df_melted, x='Channel', y='Cost',
             title='Advertising Costs by Channel',
             color='Channel',
             labels={'Cost': 'Advertising Cost'},
             color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [6]:
df_melted = pd.melt(df_excluded_product_sold, var_name='Channel', value_name='Cost')
alt.Chart(df_melted).mark_bar().encode(
    alt.X("mean(Cost):Q", title=None),
    alt.Y("Channel:N", sort=alt.EncodingSortField(field="mean(Cost)", op="mean", order="descending"), title=None),
).properties(
    title="Top Advertising Channels by Average Cost"
).configure_title(
    fontSize=14,
    anchor='middle'
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).properties(width=800,height=350)

In [7]:
df['Total_Cost'] = df[['TV', 'Billboards', 'Google_Ads', 'Social_Media',
                       'Influencer_Marketing', 'Affiliate_Marketing']].sum(axis=1)
df_melted = pd.melt(df, id_vars=['Product_Sold'], var_name='Channel', value_name='Cost')
fig = px.bar(df_melted, x='Channel', y='Cost', color='Channel',
             title='Total Advertising Costs by Channel',
             color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [8]:
df_melted = pd.melt(df_excluded_product_sold, var_name='Channel', value_name='Cost')
fig = px.pie(df_melted, names='Channel', values='Cost',
             title=' Advertising Costs by Channel',
             color='Channel',
             color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [9]:
fig = px.pie(df_melted, names='Channel', values='Cost',
             title='Total Advertising Costs by Channel',
             color='Channel',
             color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [10]:
correlation_matrix = df[['TV', 'Billboards', 'Google_Ads', 'Social_Media',
                         'Influencer_Marketing', 'Affiliate_Marketing', 'Product_Sold']].corr()
corr_melted = pd.melt(correlation_matrix.reset_index(), id_vars='index')
fig = px.imshow(correlation_matrix,
                labels={'x': 'Advertising Channel', 'y': 'Advertising Channel'},
                title='Correlation Heatmap Between Advertising Channels and Product Sold')
fig.show()

In [11]:
correlations = df[['TV', 'Billboards', 'Google_Ads', 'Social_Media',
                   'Influencer_Marketing', 'Affiliate_Marketing', 'Product_Sold']].corr()['Product_Sold']
most_effective_channel = correlations.idxmax()
fig = px.bar(x=correlations.index, y=correlations.values,
             labels={'x': 'Advertising Channel', 'y': 'Correlation with Product Sold'},
             title='Correlation Between Advertising Channels and Product Sold',
             color=correlations.index == most_effective_channel,
             color_discrete_sequence=['red' if ch == most_effective_channel else 'grey' for ch in correlations.index])
fig.show()

In [12]:
df_melted = pd.melt(df, id_vars=['Product_Sold'], var_name='Channel', value_name='Cost')
fig = px.box(df_melted, x='Channel', y='Cost',
             color='Channel',
             title='Channel Comparison: Distribution of Costs',
             labels={'Cost': 'Advertising Cost'},
             category_orders={"Channel": sorted(df.columns[~df.columns.isin(['Product_Sold'])])})
fig.show()

In [13]:
df.describe()

Unnamed: 0,TV,Billboards,Google_Ads,Social_Media,Influencer_Marketing,Affiliate_Marketing,Product_Sold,Total_Cost
count,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
mean,517.431,502.644933,512.444133,489.8001,465.732567,484.430633,7031.523333,2972.483367
std,288.114792,275.842369,285.422376,273.883915,288.31408,277.955458,1703.614951,672.237327
min,1.04,3.63,14.86,11.69,0.77,6.74,2259.0,1150.11
25%,273.09,277.9125,250.53,265.6125,214.48,267.6775,5922.5,2547.99
50%,513.97,533.02,528.965,486.385,480.355,451.315,7051.0,3032.07
75%,774.275,745.005,763.345,695.1175,703.0925,721.065,8278.0,3364.8575
max,998.1,995.32,999.23,996.16,999.83,987.58,12227.0,5225.42




Dear Kaggle Community,

I hope this message finds you well and thriving in the world of data exploration! 🌐✨

I am thrilled to share with you my latest creation, the "Product Advertising Data" - a treasure trove of insights into the dynamic landscape of advertising costs and their profound impact on product sales. After extensive efforts, I've curated this dataset with the singular goal of diving deep into visualizations that uncover intricate patterns, correlations, and meaningful trends within the advertising realm.

Your support is invaluable! 🙌 By taking a moment to explore, engage, and upvote this dataset, you contribute to the collective knowledge of our amazing community. Your gesture not only acknowledges the effort invested but also propels us all forward in the pursuit of data-driven excellence.

Let's collaborate in unleashing the power of data! 💡📊 Your upvote is not just a vote; it's an endorsement of shared learning and growth.



Thank you for your time, curiosity, and support. 🚀