In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import plotly.io as pio
import plotly.graph_objects as go
from plotly.figure_factory import create_distplot

pio.templates["draft"] = go.layout.Template(
    layout_annotations=[
        dict(
            textangle=-30,
            opacity=0.1,
            font=dict(color="black", size=100),
            xref="paper",
            yref="paper",
            x=0.5,
            y=0.5,
            showarrow=False,
        )
    ]
)
pio.templates.default = "draft"

In [None]:
data = pd.read_csv('/kaggle/input/social-network-ads/Social_Network_Ads.csv')
data

In [None]:
data.info()

# EDA: Exploratory Data Analysis

## Target Variable: Purchased

In [None]:
fig = go.Figure()

fig.add_trace(go.Histogram(x=data['Purchased'], histfunc='count', marker_color='#330C73'))

fig.update_layout(title="Target Variable Countplot",
                  xaxis_title="Purchased",
                  yaxis_title="Count",
                  bargap=0.5)

fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(labels=data['Purchased'].value_counts().index, values=data['Purchased'].value_counts().values,
                     pull=[0.2, 0], textfont=dict(size=16)))

fig.update_layout(title="Target Variable Pie Plot",
                 annotations=[dict(text='Purchased', x=0.45, y=0.65, font_size=14, showarrow=True)])


fig.show()

## Histograms

In [None]:
# Put the cursor on a bar to see the corresponding Age/Salary range.

fig = go.Figure()

fig.add_trace(go.Histogram(x=data['Age'], histfunc='count', marker_color='#330C73'))
fig.add_trace(go.Histogram(x=data['EstimatedSalary'], histfunc='count', marker_color='#330C73', visible=False))
fig.update_layout(title="Title Will be Updated Once You Select a Feature from the Left Button !")

buttons = [dict(method="update", 
                label="Age",
                args=[{"type":"histogram", "x":[data["Age"]]},
                      {"title":"Age Histogram", "xaxis.title":"Age"}]),
          dict(method="update", 
                label="Estimated Salary",
                args=[{"type":"histogram", "x":[data["EstimatedSalary"]]},
                     {"title":"Estimated Salary Histogram", "xaxis.title":"Estimated Salary"}])]

fig.update_layout(updatemenus=[{"buttons":buttons, "active":0,"showactive":False, "direction":"down", 'x':0.1, 'y':1.2}],
                  xaxis_title="Age",
                  yaxis_title="Count",
                  bargap=0.05)

fig.show()

## Distplots

In [None]:
fig = create_distplot([data['Age']], ["Age"], curve_type="normal", show_curve=True, show_hist=True) # default curve_type is "kde"

fig.update_layout(title="Age with Normal Distplot")

fig.show()

In [None]:
fig = create_distplot([data['EstimatedSalary']], ["Estimated Salary"], curve_type="normal", show_curve=True, show_hist=False) # default curve_type is "kde"

fig.update_layout(title="Estimated Salary with Normal Distplot")

fig.show()

## Boxplots

In [None]:
# Put the cursor on a bar to see the corresponding Age/Salary range.

fig = go.Figure()

fig.add_trace(go.Box(y=data['Age'], marker_color='indianred', boxmean='sd' ,name=""))
fig.add_trace(go.Box(y=data['EstimatedSalary'], visible=False))
fig.update_layout(title="Title Will be Updated Once You Select a Feature from the Left Button !")

buttons = [dict(method="update", 
                label="Age",
                args=[{"type":"box", "y":[data["Age"]], "boxes.name":""},
                      {"title":"Age Boxplot", "xaxis.title":"Age"}]),
          dict(method="update", 
                label="Estimated Salary",
                args=[{"type":"box", "y":[data["EstimatedSalary"]], "boxes.name":""},
                     {"title":"Estimated Salary Boxplot", "xaxis.title":"Estimated Salary"}])]

fig.update_layout(updatemenus=[{"buttons":buttons, "active":0,"showactive":False, "direction":"down", 'x':0.1, 'y':1.2}],
                  xaxis_title="Age",
                  yaxis_title="Count",
                  bargap=0.05)

fig.show()

## Features' Distributions Over Target Variable

In [None]:
fig = go.Figure()

x0, x1 = data.loc[data.Purchased==0, 'Age'].to_numpy(), data.loc[data.Purchased==1, 'Age'].to_numpy()
x2, x3 = data.loc[data.Purchased==0, 'EstimatedSalary'].to_numpy(), data.loc[data.Purchased==1, 'EstimatedSalary'].to_numpy()
fig.add_trace(go.Histogram(x=x0, histfunc='count', name="Not Purchased", visible=True))
fig.add_trace(go.Histogram(x=x1, histfunc='count', name="Purchased", visible=True))
fig.add_trace(go.Histogram(x=x2, histfunc='count', name="Not Purchased", visible=False))
fig.add_trace(go.Histogram(x=x3, histfunc='count', name="Purchased", visible=False))
fig.update_layout(title="Title Will be Updated Once You Select a Feature from the Left Button !")


buttons = [dict(method="update", label="Age",
               args=[{"x":[x0,x1], "type":"histogram"}, {"title":"Age Histograms Over Purchased", "xaxis.title":"Age"}]),
          dict(method="update", label="Estimated Salary",
               args=[{"x":[x2,x3], "type":"histogram"}, {"title":"Estimated Salary Histograms Over Purchased", "xaxis.title":"Estimated Salary"}])]

fig.update_layout(updatemenus=[{"buttons":buttons, "active":0, "showactive":False, "direction":"down", 'x':0.1, 'y':1.2}])

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Box(y=x0, marker_color='lightgreen', boxmean='sd' ,name="Not Purchased"))
fig.add_trace(go.Box(y=x1, marker_color='darkgreen', boxmean='sd' ,name="Purchased"))

fig.update_layout(title="Age Boxplots Over Purchased", yaxis_title="Age")

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Box(y=x2, marker_color='lightgreen', boxmean='sd' ,name="Not Purchased"))
fig.add_trace(go.Box(y=x3, marker_color='darkgreen', boxmean='sd' ,name="Purchased"))

fig.update_layout(title="Estimated Salary Boxplots Over Purchased", yaxis_title="Estimated Salary")

fig.show()

In [None]:
data

In [None]:
data['SalaryClass'] = pd.cut(x=data['EstimatedSalary'], bins=[0, 60000, 100000, 150000], labels=[1,2,3], include_lowest=True) # data['SalaryClass'] = 

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data['Age'], y=data['EstimatedSalary'], mode='markers', 
                         marker=dict(color=data['SalaryClass'].to_numpy(), size=15, showscale=True)))

fig.update_layout(xaxis_title='Age', yaxis_title='Estimated Salary', title="Scatter Plot - Color = Salary Class")

fig.show()