<a id="11"></a>
# <div style="font-size:80px; text-align: center; background-color: royalblue; font-family:Rubik; color: #ffffff; padding: 14px; line-height: 1;border-radius:20px; border: 4px solid #3f484b;"><b>Plotly</b> <br> Titanic Analysis</div>

In [2]:
import pandas as pd
import numpy as np

# visualization
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

from plotly.subplots import make_subplots

font = 'Rubik'

pd.set_option('display.max_columns', None)

In [3]:
df = pd.read_csv('/kaggle/input/titanic/train.csv')

In [4]:
#### Handling Missing Values

## Fill missing AGE with Median
df['Age'].fillna(df['Age'].median(), inplace=True)

## Fill missing EMBARKED with Mode
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

df['Survived'] = df['Survived'].apply(str)

df['FamilySize'] = 1 + df['SibSp'] + df['Parch']

df.drop(df[(df['Fare'] > 270)].index, inplace=True)

In [5]:
def group_by(df, col):
    new_df = df.groupby([col]).size().reset_index().rename(columns={0: 'count'})
    return new_df

In [6]:
color_1 = '#37536d'
color_2 = 'royalblue'
color_3 = '#f0f4f5'
bg_color = '#dfecf4'
colors = [color_1,color_2, color_3]

In [7]:
sex_count = group_by(df, 'Sex')
pclass_count = group_by(df, 'Pclass')
embarked_count = group_by(df, 'Embarked')
family_count = group_by(df, 'FamilySize')
survived_count = group_by(df, 'Survived')

fig = make_subplots(
    rows=8, cols=2,
    specs=[[{}, {'type':'domain'}],
           [{}, {'type':'domain'}],
           [{}, {'type':'domain'}],
           [{"rowspan": 2, "colspan": 2}, None],
           [None, None],
           [{"colspan": 2}, None],
           [{"rowspan": 2}, {"rowspan": 2, 'type':'domain'}],
           [None, None]],
    subplot_titles=('<i>Gender Bar', '<i>Gender Pie', '<i>Pclass Bar', '<i>Pclass Pie',
                    '<i>Embarked Bar', '<i>Embarked Pie', '<i>Age Distribution',
                    '<i>Familty Size', '<i>Survived Bar', '<i>Survived Pie'),
)

## Gender Plots
fig.add_trace(go.Bar(x=sex_count['Sex'], y=sex_count['count'],
                     text=sex_count['count'], name="", marker_color=colors), row=1, col=1)

fig.add_trace(go.Pie(labels=sex_count['Sex'], values=sex_count['count'], name="",
                     pull=[0.1, 0], marker_colors=[color_1, color_2]), 1, 2)


## Pclass Plots
fig.add_trace(go.Bar(x=pclass_count['Pclass'], y=pclass_count['count'],
                     text=pclass_count['count'], name="", marker_color=colors), row=2, col=1)

fig.add_trace(go.Pie(labels=pclass_count['Pclass'], values=pclass_count['count'], name="",
                     pull=[0, 0, 0.1], marker_colors=colors), 2, 2)


## Embarked Plots
fig.add_trace(go.Bar(x=embarked_count['Embarked'], y=embarked_count['count'],
                     text=embarked_count['count'], name="", marker_color=colors), row=3, col=1)

fig.add_trace(go.Pie(labels=embarked_count['Embarked'], values=embarked_count['count'], name="",
                     pull=[0, 0, 0.1], marker_colors=colors), 3, 2)


## Age Histogram
fig.add_trace(go.Histogram(x=df['Age'], name="", histnorm='probability'), row=4, col=1)


## Family Plot
fig.add_trace(go.Bar(x=family_count['FamilySize'], y=family_count['count'],
                     text=family_count['count'], name="", marker_color=color_3), row=6, col=1)


## Survived Plots
fig.add_trace(go.Bar(x=survived_count['Survived'], y=survived_count['count'],
                     text=survived_count['count'], name="", marker_color=colors), row=7, col=1)

fig.add_trace(go.Pie(labels=survived_count['Survived'], values=survived_count['count'], name="",
                     pull=[0.05, 0], marker_colors=[color_1, color_2]), 7, 2)


fig.update_layout(height=1600, width=800,
                  showlegend=False,
                  title_text="Titanic Univariate", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_yaxes(showgrid=False)

fig.update_xaxes(categoryorder='array',
                 categoryarray= ['No Experience', '< 1 years', '1-3 years', '3-5 years', '5-10 years',
                                 '10-20 years', '20+ years'])


fig.update_traces( marker_line_color='#3f484b',
                  marker_line_width=2)

fig.show()

<a id="3.1"></a>
<h3 style="font-size:25px; text-align: left;background-color:royalblue; font-family:Rubik; color: #ffffff; padding: 16px; line-height: 1; border-radius:10px; border: 3px solid #3f484b;"> Gender Analysis</h3>

In [8]:
male_df = df[df['Sex'] == 'male']
female_df = df[df['Sex'] == 'female']

sex_pclass_df = df.groupby(['Sex', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
male_pclass_df = male_df.groupby(['Sex', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
female_pclass_df = female_df.groupby(['Sex', 'Pclass']).size().reset_index().rename(columns={0: 'count'})

sex_embarked_df = df.groupby(['Sex', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
male_embarked_df = male_df.groupby(['Sex', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
female_embarked_df = female_df.groupby(['Sex', 'Embarked']).size().reset_index().rename(columns={0: 'count'})

sex_survived_df = df.groupby(['Sex', 'Survived']).size().reset_index().rename(columns={0: 'count'})
male_survived_df = male_df.groupby(['Sex', 'Survived']).size().reset_index().rename(columns={0: 'count'})
female_survived_df = female_df.groupby(['Sex', 'Survived']).size().reset_index().rename(columns={0: 'count'})

In [9]:
fig = make_subplots(
    rows=6, cols=2,
    specs=[[{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{}, {"type": "sunburst"}]],
    subplot_titles=('<i>Pclass Bar', '<i>Pclass Sunburst', '<i>Embarked Bar', '<i>Embarked Sunburst',
                   '<i>Male Age Distribution', '<i>Female Age Distribution', '<i>Family Size Distribution',
                   '<i>Survived Bar', '<i>Survived Sunburst'),
)

## Pclass Bars
fig.add_trace(go.Bar(y=male_pclass_df['Pclass'],
                x=male_pclass_df['count'], orientation='h',
                text=male_pclass_df['count'],
                name='Male',
                marker_color=color_1
                ), row=1, col=1)

fig.add_trace(go.Bar(y=female_pclass_df['Pclass'],
                x=female_pclass_df['count'], orientation='h',
                text=female_pclass_df['count'],
                name='Female',
                marker_color=color_2
                ), row=1, col=1)


## Sex and Pclass Sunburst
sb1 = px.sunburst(sex_pclass_df, values='count', path=['Sex', 'Pclass'], color='Sex',
                 color_discrete_sequence=[color_2, color_1])
fig.add_trace(sb1.data[0], row=1, col=2)


## Embarked Bars
fig.add_trace(go.Bar(y=male_embarked_df['Embarked'],
                x=male_embarked_df['count'], orientation='h',
                text=male_embarked_df['count'],
                name='Male',
                marker_color=color_1
                ), row=2, col=1)
fig.add_trace(go.Bar(y=female_embarked_df['Embarked'],
                x=female_embarked_df['count'], orientation='h',
                text=female_embarked_df['count'],
                name='Female',
                marker_color=color_2
                ),row=2, col=1)

## Sex and Embarked Sunburst
sb2 = px.sunburst(sex_embarked_df, values='count', path=['Sex', 'Embarked'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb2.data[0], row=2, col=2)


## Age Histogram
fig.add_trace(go.Histogram(x=male_df['Age'], name="Male", histnorm='percent',
                          marker_color=color_1), row=3, col=1)
fig.add_trace(go.Histogram(x=female_df['Age'], name="Female", histnorm='percent', 
                          marker_color=color_2), row=4, col=1)


## Family Size Distribution
fig.add_trace(go.Histogram(x=male_df['FamilySize'], name="Male", histnorm='percent',
                          marker_color=color_1), row=5, col=1)

fig.add_trace(go.Histogram(x=female_df['FamilySize'], name="Female", histnorm='percent',
                          marker_color=color_2), row=5, col=1)

## Survived Bars
fig.add_trace(go.Bar(x=male_survived_df['Survived'],
                y=male_survived_df['count'],
                text=male_survived_df['count'],
                name='Male',
                marker_color=color_1
                ), row=6, col=1)

fig.add_trace(go.Bar(x=female_survived_df['Survived'],
                y=female_survived_df['count'],
                text=female_survived_df['count'],
                name='Female',
                marker_color=color_2
                ), row=6, col=1)

## Sex and Survived Sunburst
sb3 = px.sunburst(sex_survived_df, values='count', path=['Sex', 'Survived'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb3.data[0], row=6, col=2)


fig.update_layout(height=1800, width=800,
                  showlegend=False,
                  title_text="Gender Analysis wrt", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_traces(marker_line_color='#3f484b',
                  marker_line_width=1.5)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

In [39]:
## Grouping Datasets
male_pclass_embarked_survive_df = male_df.groupby(['Pclass', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
female_pclass_embarked_survive_df = female_df.groupby(['Pclass', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})


## Creating Sunburst Figures
sb1 = px.sunburst(male_pclass_embarked_survive_df, values='count', path=['Pclass', 'Embarked', 'Survived'])
sb2 = px.sunburst(female_pclass_embarked_survive_df, values='count', path=['Pclass', 'Embarked', 'Survived'])

## Subplots
fig = make_subplots(rows=1, cols=2, specs=[
    [{"type": "sunburst"}, {"type": "sunburst"}]],
            subplot_titles=("Male X Pclass X Embarked X Survived", "Female X Pclass X Embarked X Survived"))

## Plotting Figures
fig.add_trace(sb1.data[0], row=1, col=1)
fig.add_trace(sb2.data[0], row=1, col=2)

fig.update_traces(textinfo="label+percent parent")

# Update title and height
fig.update_layout(title_text="Male vs Female Sunburst", title_x=0.5, height=600, template='plotly_dark', showlegend=False,
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

<a id="3.1"></a>
<h3 style="font-size:25px; text-align: left;background-color:royalblue; font-family:Rubik; color: #ffffff; padding: 16px; line-height: 1; border-radius:10px; border: 3px solid #3f484b;"> Pclass Analysis</h3>

In [10]:
pclass1_df = df[df['Pclass'] == 1]
pclass2_df = df[df['Pclass'] == 2]
pclass3_df = df[df['Pclass'] == 3]

pclass_sex_df = df.groupby(['Pclass', 'Sex']).size().reset_index().rename(columns={0: 'count'})
pclass1_sex_df = pclass1_df.groupby(['Pclass', 'Sex']).size().reset_index().rename(columns={0: 'count'})
pclass2_sex_df = pclass2_df.groupby(['Pclass', 'Sex']).size().reset_index().rename(columns={0: 'count'})
pclass3_sex_df = pclass3_df.groupby(['Pclass', 'Sex']).size().reset_index().rename(columns={0: 'count'})

pclass_embarked_df = df.groupby(['Pclass', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
pclass1_embarked_df = pclass1_df.groupby(['Pclass', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
pclass2_embarked_df = pclass2_df.groupby(['Pclass', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
pclass3_embarked_df = pclass3_df.groupby(['Pclass', 'Embarked']).size().reset_index().rename(columns={0: 'count'})

pclass_survived_df = df.groupby(['Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass1_survived_df = pclass1_df.groupby(['Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass2_survived_df = pclass2_df.groupby(['Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass3_survived_df = pclass3_df.groupby(['Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})

In [11]:
fig = make_subplots(
    rows=7, cols=2,
    specs=[[{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{}, {"type": "sunburst"}]],
    subplot_titles=('<i>Gender Bar', '<i>Gender Sunburst', '<i>Embarked Bar', '<i>Embarked Sunburst',
                   '<i>Pclass 3 Age Distribution', '<i>Pclass 2 Age Distribution',
                    '<i>Pclass 1 Age Distribution', '<i>Family Size Distribution',
                   '<i>Survived Bar', '<i>Survived Sunburst'),
)

## Gender Bars
fig.add_trace(go.Bar(y=pclass1_sex_df['Sex'],
                x=pclass1_sex_df['count'], orientation='h',
                text=pclass1_sex_df['count'],
                name='Pclass 1',
                marker_color=color_3
                ), row=1, col=1)

fig.add_trace(go.Bar(y=pclass2_sex_df['Sex'],
                x=pclass2_sex_df['count'], orientation='h',
                text=pclass2_sex_df['count'],
                name='Pclass 2',
                marker_color=color_2
                ), row=1, col=1)

fig.add_trace(go.Bar(y=pclass3_sex_df['Sex'],
                x=pclass3_sex_df['count'], orientation='h',
                text=pclass3_sex_df['count'],
                name='Pclass 3',
                marker_color=color_1
                ), row=1, col=1)


## Pclass and Sex Sunburst
sb1 = px.sunburst(pclass_sex_df, values='count', path=['Pclass', 'Sex'],
                 color_discrete_sequence=[color_3, color_2, color_1])
fig.add_trace(sb1.data[0], row=1, col=2)


## Embarked Bars
fig.add_trace(go.Bar(y=pclass1_embarked_df['Embarked'],
                x=pclass1_embarked_df['count'], orientation='h',
                text=pclass1_embarked_df['count'],
                name='Pclass1',
                marker_color=color_3
                ), row=2, col=1)
fig.add_trace(go.Bar(y=pclass2_embarked_df['Embarked'],
                x=pclass2_embarked_df['count'], orientation='h',
                text=pclass2_embarked_df['count'],
                name='Pclass 2',
                marker_color=color_2
                ),row=2, col=1)

fig.add_trace(go.Bar(y=pclass3_embarked_df['Embarked'],
                x=pclass3_embarked_df['count'], orientation='h',
                text=pclass3_embarked_df['count'],
                name='Pclass 3',
                marker_color=color_1
                ),row=2, col=1)


## Pclass and Embarked Sunburst
sb2 = px.sunburst(pclass_embarked_df, values='count', path=['Pclass', 'Embarked'],
                 color_discrete_sequence=[color_1, color_2, color_3])
fig.add_trace(sb2.data[0], row=2, col=2)


## Pclass Histogram
fig.add_trace(go.Histogram(x=pclass3_df['Age'], name="Pclass 3", histnorm='percent',
                          marker_color=color_1), row=3, col=1)
fig.add_trace(go.Histogram(x=pclass2_df['Age'], name="Pclass 2", histnorm='percent', 
                          marker_color=color_2), row=4, col=1)
fig.add_trace(go.Histogram(x=pclass1_df['Age'], name="Pclass 1", histnorm='percent', 
                          marker_color=color_3), row=5, col=1)


## Family Size Distribution
fig.add_trace(go.Histogram(x=pclass3_df['FamilySize'], name="Pclass 3", histnorm='percent',
                          marker_color=color_1), row=6, col=1)

fig.add_trace(go.Histogram(x=pclass2_df['FamilySize'], name="Pclass 2", histnorm='percent',
                          marker_color=color_2), row=6, col=1)

fig.add_trace(go.Histogram(x=pclass3_df['FamilySize'], name="Pclass 3", histnorm='percent',
                          marker_color=color_3), row=6, col=1)

## Survived Bars
fig.add_trace(go.Bar(x=pclass3_survived_df['Survived'],
                y=pclass3_survived_df['count'],
                text=pclass3_survived_df['count'],
                name='Pclass 3',
                marker_color=color_1
                ), row=7, col=1)

fig.add_trace(go.Bar(x=pclass2_survived_df['Survived'],
                y=pclass2_survived_df['count'],
                text=pclass2_survived_df['count'],
                name='Pclass 2',
                marker_color=color_2
                ), row=7, col=1)

fig.add_trace(go.Bar(x=pclass1_survived_df['Survived'],
                y=pclass1_survived_df['count'],
                text=pclass1_survived_df['count'],
                name='Pclass 1',
                marker_color=color_3
                ), row=7, col=1)

## Pclass and Survived Sunburst
sb3 = px.sunburst(pclass_survived_df, values='count', path=['Pclass', 'Survived'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb3.data[0], row=7, col=2)


fig.update_layout(height=2200, width=800,
                  showlegend=False,
                  title_text="Pclass Analysis wrt", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_traces(marker_line_color='#3f484b',
                  marker_line_width=1.5)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

In [38]:
## Grouping Datasets
pclass1_sex_embarked_survive_df = pclass1_df.groupby(['Sex', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass2_sex_embarked_survive_df = pclass2_df.groupby(['Sex', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass3_sex_embarked_survive_df = pclass3_df.groupby(['Sex', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})


## Creating Sunburst Figures
sb1 = px.sunburst(pclass1_sex_embarked_survive_df, values='count', path=['Sex', 'Embarked', 'Survived'])
sb2 = px.sunburst(pclass2_sex_embarked_survive_df, values='count', path=['Sex', 'Embarked', 'Survived'])
sb3 = px.sunburst(pclass3_sex_embarked_survive_df, values='count', path=['Sex', 'Embarked', 'Survived'])


## Subplots
fig = make_subplots(rows=3, cols=1, specs=[
    [{"type": "sunburst"}],
    [{"type": "sunburst"}],
    [{"type": "sunburst"}]],
            subplot_titles=("Pclass 1", "Pclass 2", "Pclass 3"))

## Plotting Figures
fig.add_trace(sb1.data[0], row=1, col=1)
fig.add_trace(sb2.data[0], row=2, col=1)
fig.add_trace(sb3.data[0], row=3, col=1)


fig.update_traces(textinfo="label+percent parent")

# Update title and height
fig.update_layout(title_text="Pclass1 vs Pclass2 vs Pclass3", height=1500, template='plotly_dark', showlegend=False,
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

<a id="3.1"></a>
<h3 style="font-size:25px; text-align: left;background-color:royalblue; font-family:Rubik; color: #ffffff; padding: 16px; line-height: 1; border-radius:10px; border: 3px solid #3f484b;"> Embarked Analysis</h3>

In [12]:
embarked_S_df = df[df['Embarked'] == 'S']
embarked_Q_df = df[df['Embarked'] == 'Q']
embarked_C_df = df[df['Embarked'] == 'C']

embarked_sex_df = df.groupby(['Embarked', 'Sex']).size().reset_index().rename(columns={0: 'count'})
embarked_S_sex_df = embarked_S_df.groupby(['Embarked', 'Sex']).size().reset_index().rename(columns={0: 'count'})
embarked_Q_sex_df = embarked_Q_df.groupby(['Embarked', 'Sex']).size().reset_index().rename(columns={0: 'count'})
embarked_C_sex_df = embarked_C_df.groupby(['Embarked', 'Sex']).size().reset_index().rename(columns={0: 'count'})

embarked_pclass_df = df.groupby(['Embarked', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
embarked_S_pclass_df = embarked_S_df.groupby(['Embarked', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
embarked_Q_pclass_df = embarked_Q_df.groupby(['Embarked', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
embarked_C_pclass_df = embarked_C_df.groupby(['Embarked', 'Pclass']).size().reset_index().rename(columns={0: 'count'})

embarked_survived_df = df.groupby(['Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_S_survived_df = embarked_S_df.groupby(['Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_Q_survived_df = embarked_Q_df.groupby(['Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_C_survived_df = embarked_C_df.groupby(['Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})

In [13]:
fig = make_subplots(
    rows=7, cols=2,
    specs=[[{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{}, {"type": "sunburst"}]],
    subplot_titles=('<i>Gender Bar', '<i>Gender Sunburst', '<i>Pclass Bar', '<i>Pclass Sunburst',
                   '<i>Embarked S Age Distribution', '<i>Embarked Q Age Distribution',
                    '<i>Embarked C Age Distribution', '<i>Family Size Distribution',
                   '<i>Survived Bar', '<i>Survived Sunburst'),
)

## Gender Bars
fig.add_trace(go.Bar(y=embarked_S_sex_df['Sex'],
                x=embarked_S_sex_df['count'], orientation='h',
                text=embarked_S_sex_df['count'],
                name='Embarked S',
                marker_color=color_3
                ), row=1, col=1)

fig.add_trace(go.Bar(y=embarked_Q_sex_df['Sex'],
                x=embarked_Q_sex_df['count'], orientation='h',
                text=embarked_Q_sex_df['count'],
                name='Embarked Q',
                marker_color=color_2
                ), row=1, col=1)

fig.add_trace(go.Bar(y=embarked_C_sex_df['Sex'],
                x=embarked_C_sex_df['count'], orientation='h',
                text=embarked_C_sex_df['count'],
                name='Embarked C',
                marker_color=color_1
                ), row=1, col=1)


## Embarked and Sex Sunburst
sb1 = px.sunburst(embarked_sex_df, values='count', path=['Embarked', 'Sex'],
                 color_discrete_sequence=[color_3, color_2, color_1])
fig.add_trace(sb1.data[0], row=1, col=2)


## Pclass Bars
fig.add_trace(go.Bar(y=embarked_S_pclass_df['Pclass'],
                x=embarked_S_pclass_df['count'], orientation='h',
                text=embarked_S_pclass_df['count'],
                name='Embarked S',
                marker_color=color_3
                ), row=2, col=1)
fig.add_trace(go.Bar(y=embarked_Q_pclass_df['Pclass'],
                x=embarked_Q_pclass_df['count'], orientation='h',
                text=embarked_Q_pclass_df['count'],
                name='Embarked Q',
                marker_color=color_2
                ),row=2, col=1)

fig.add_trace(go.Bar(y=embarked_C_pclass_df['Pclass'],
                x=embarked_C_pclass_df['count'], orientation='h',
                text=embarked_C_pclass_df['count'],
                name='Embarked C',
                marker_color=color_1
                ),row=2, col=1)


## Embarked and Pclass Sunburst
sb2 = px.sunburst(embarked_pclass_df, values='count', path=['Embarked', 'Pclass'],
                 color_discrete_sequence=[color_1, color_2, color_3])
fig.add_trace(sb2.data[0], row=2, col=2)


## Embarked Histogram
fig.add_trace(go.Histogram(x=embarked_S_df['Age'], name="Embarked S", histnorm='percent',
                          marker_color=color_1), row=3, col=1)
fig.add_trace(go.Histogram(x=embarked_Q_df['Age'], name="Embarked Q", histnorm='percent', 
                          marker_color=color_2), row=4, col=1)
fig.add_trace(go.Histogram(x=embarked_C_df['Age'], name="Embarked C", histnorm='percent', 
                          marker_color=color_3), row=5, col=1)


## Family Size Distribution
fig.add_trace(go.Histogram(x=embarked_S_df['FamilySize'], name="Embarked S", histnorm='percent',
                          marker_color=color_1), row=6, col=1)

fig.add_trace(go.Histogram(x=embarked_Q_df['FamilySize'], name="Embarked Q", histnorm='percent',
                          marker_color=color_2), row=6, col=1)

fig.add_trace(go.Histogram(x=embarked_C_df['FamilySize'], name="Embarked C", histnorm='percent',
                          marker_color=color_3), row=6, col=1)

## Survived Bars
fig.add_trace(go.Bar(x=embarked_S_survived_df['Survived'],
                y=embarked_S_survived_df['count'],
                text=embarked_S_survived_df['count'],
                name='Embarked S',
                marker_color=color_1
                ), row=7, col=1)

fig.add_trace(go.Bar(x=embarked_Q_survived_df['Survived'],
                y=embarked_Q_survived_df['count'],
                text=embarked_Q_survived_df['count'],
                name='Embarked Q',
                marker_color=color_2
                ), row=7, col=1)

fig.add_trace(go.Bar(x=embarked_C_survived_df['Survived'],
                y=embarked_C_survived_df['count'],
                text=embarked_C_survived_df['count'],
                name='Embarked C',
                marker_color=color_3
                ), row=7, col=1)

## Embarked and Survived Sunburst
sb3 = px.sunburst(embarked_survived_df, values='count', path=['Embarked', 'Survived'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb3.data[0], row=7, col=2)


fig.update_layout(height=2200, width=800,
                  showlegend=False,
                  title_text="Embarked Analysis wrt", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_traces(marker_line_color='#3f484b',
                  marker_line_width=1.5)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

In [44]:
## Grouping Datasets
embarked_S_sex_pclass_survive_df = embarked_S_df.groupby(['Sex', 'Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_Q_sex_pclass_survive_df = embarked_Q_df.groupby(['Sex', 'Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_C_sex_pclass_survive_df = embarked_C_df.groupby(['Sex', 'Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})


## Creating Sunburst Figures
sb1 = px.sunburst(embarked_S_sex_pclass_survive_df, values='count', path=['Sex', 'Pclass', 'Survived'])
sb2 = px.sunburst(embarked_Q_sex_pclass_survive_df, values='count', path=['Sex', 'Pclass', 'Survived'])
sb3 = px.sunburst(embarked_C_sex_pclass_survive_df, values='count', path=['Sex', 'Pclass', 'Survived'])


## Subplots
fig = make_subplots(rows=3, cols=1, specs=[
    [{"type": "sunburst"}],
    [{"type": "sunburst"}],
    [{"type": "sunburst"}]],
            subplot_titles=("Embarked S", "Embarked Q", "Embarked C"))

## Plotting Figures
fig.add_trace(sb1.data[0], row=1, col=1)
fig.add_trace(sb2.data[0], row=2, col=1)
fig.add_trace(sb3.data[0], row=3, col=1)


fig.update_traces(textinfo="label+percent parent")

# Update title and height
fig.update_layout(title_text="Embarked S vs Embarked Q vs Embarked C", height=1500, template='plotly_dark', showlegend=False,
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

<a id="3.1"></a>
<h3 style="font-size:25px; text-align: left;background-color:royalblue; font-family:Rubik; color: #ffffff; padding: 16px; line-height: 1; border-radius:10px; border: 3px solid #3f484b;"> Survived Analysis</h3>

In [14]:
survived_df = df[df['Survived'] == '1']
not_survived_df = df[df['Survived'] == '0']

survived_sex_df = df.groupby(['Survived', 'Sex']).size().reset_index().rename(columns={0: 'count'})
survive_sex_df = survived_df.groupby(['Survived', 'Sex']).size().reset_index().rename(columns={0: 'count'})
not_survive_sex_df = not_survived_df.groupby(['Survived', 'Sex']).size().reset_index().rename(columns={0: 'count'})

survived_pclass_df = df.groupby(['Survived', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
survive_pclass_df = survived_df.groupby(['Survived', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
not_survive_pclass_df = not_survived_df.groupby(['Survived', 'Pclass']).size().reset_index().rename(columns={0: 'count'})

survived_embarked_df = df.groupby(['Survived', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
survive_embarked_df = survived_df.groupby(['Survived', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
not_survive_embarked_df = not_survived_df.groupby(['Survived', 'Embarked']).size().reset_index().rename(columns={0: 'count'})

In [15]:
fig = make_subplots(
    rows=6, cols=2,
    specs=[[{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None]],
    subplot_titles=('<i>Gender Bar', '<i>Gender Sunburst', '<i>Pclass Bar', '<i>Pclass Sunburst',
                    '<i>Embarked Bar', '<i>Embarked Sunburst',
                   '<i>Survived Age Distribution(%)', '<i>Not Survived Age Distribution(%)', '<i>Family Size Distribution(%)'),
)


## Gender Bars
fig.add_trace(go.Bar(x=survive_sex_df['Sex'],
                y=survive_sex_df['count'],
                text=survive_sex_df['count'],
                name='Survived',
                marker_color=color_2
                ), row=1, col=1)

fig.add_trace(go.Bar(x=not_survive_sex_df['Sex'],
                y=not_survive_sex_df['count'],
                text=not_survive_sex_df['count'],
                name='Not Survived',
                marker_color=color_1
                ), row=1, col=1)

## Survived and Sex Sunburst
sb1 = px.sunburst(survived_sex_df, values='count', path=['Survived', 'Sex'], color='Survived',
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb1.data[0], row=1, col=2)


## Pclass Bars
fig.add_trace(go.Bar(y=survive_pclass_df['Pclass'],
                x=survive_pclass_df['count'], orientation='h',
                text=survive_pclass_df['count'],
                name='Survived',
                marker_color=color_2
                ), row=2, col=1)

fig.add_trace(go.Bar(y=not_survive_pclass_df['Pclass'],
                x=not_survive_pclass_df['count'], orientation='h',
                text=not_survive_pclass_df['count'],
                name='Not Survived',
                marker_color=color_1
                ), row=2, col=1)


## Survived and Pclass Sunburst
sb2 = px.sunburst(survived_pclass_df, values='count', path=['Survived', 'Pclass'],
                 color_discrete_sequence=[color_2, color_1])
fig.add_trace(sb2.data[0], row=2, col=2)


## Embarked Bars
fig.add_trace(go.Bar(y=survive_embarked_df['Embarked'],
                x=survive_embarked_df['count'], orientation='h',
                text=survive_embarked_df['count'],
                name='Survived',
                marker_color=color_2
                ), row=3, col=1)
fig.add_trace(go.Bar(y=not_survive_embarked_df['Embarked'],
                x=not_survive_embarked_df['count'], orientation='h',
                text=not_survive_embarked_df['count'],
                name='Not Survived',
                marker_color=color_1
                ),row=3, col=1)

## Survived and Embarked Sunburst
sb3 = px.sunburst(survived_embarked_df, values='count', path=['Survived', 'Embarked'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb3.data[0], row=3, col=2)


## Age Histogram
fig.add_trace(go.Histogram(x=survived_df['Age'], name="Survived", histnorm='percent',
                          marker_color=color_2), row=4, col=1)
fig.add_trace(go.Histogram(x=not_survived_df['Age'], name="Not Survived", histnorm='percent', 
                          marker_color=color_1), row=5, col=1)


## Family Size Distribution
fig.add_trace(go.Histogram(x=survived_df['FamilySize'], name="Survived", histnorm='percent',
                          marker_color=color_2), row=6, col=1)

fig.add_trace(go.Histogram(x=not_survived_df['FamilySize'], name="Not Survived", histnorm='percent',
                          marker_color=color_1), row=6, col=1)


fig.update_layout(height=1800, width=800,
                  showlegend=False,
                  title_text="Survived Analysis wrt", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_traces(marker_line_color='#3f484b',
                  marker_line_width=1.5)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

<blockquote style="margin-right:auto; font-family:Courier New; margin-left:auto; color:white; background-color: #0a3a6f; padding: 1em; margin:24px;">
   
<ul>
    <li> <font color="white" size=+2.0> <b> Gender vs Survival, Stats: </b> </font>
    <ul>
        <li> <font color="white" size=+1.0> <b><i>Men</i></b> Survival Rate: <b><i>18.6%</i></b> </font>
        <li> <font color="white" size=+1.0> <b><i>Women</i></b> Survival Rate: <b><i>74%</i></b> </font>
    </ul>
        
</ul> 
    
<ul>
    <li> <font color="white" size=+2.0> <b> Pclass vs Survival, Stats: </b> </font>
    <ul>
        <li> <font color="white" size=+1.0> <b><i>Pclass 1</i></b> Survival Rate: <b><i>62.4%</i></b> </font>
        <li> <font color="white" size=+1.0> <b><i>Pclass 2</i></b> Survival Rate: <b><i>47.3%</i></b> </font>
        <li> <font color="white" size=+1.0> <b><i>Pclass 3</i></b> Survival Rate: <b><i>24.2%</i></b> </font>

</ul>  
</ul> 
    
<ul>
    <li> <font color="white" size=+2.0> <b> Embarked vs Survival, Stats: </b> </font>
    <ul>
        <li> <font color="white" size=+1.0> <b><i>Embarked S</i></b> Survival Rate: <b><i>33.9%</i></b> </font>
        <li> <font color="white" size=+1.0> <b><i>Embarked Q</i></b> Survival Rate: <b><i>39%</i></b> </font>
        <li> <font color="white" size=+1.0> <b><i>Embarked C</i></b> Survival Rate: <b><i>54.5%</i></b> </font>
    </ul>
        
</ul> 
</blockquote>
                                                                                                                                            

<h3 style="font-size:30px; text-align: left;background-color:royalblue; font-family:Rubik; color: #ffffff; padding: 16px; line-height: 1; border-radius:10px; border: 3px solid #3f484b;"> Gender X Pclass X Age</h3>

In [20]:
sex_pclass_age_df = df.groupby(['Sex', 'Pclass', 'Age']).size().reset_index().rename(columns={0: 'count'})

fig = px.treemap(sex_pclass_age_df, path=[px.Constant("Treemap"), 'Sex', 'Pclass'], values='count',
                  color='Age', hover_data=['count'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(sex_pclass_age_df['Age'], weights=sex_pclass_age_df['count']))

fig.update_traces(textinfo="label+percent parent")

fig.update_layout(title_text="<i> Gender </i> X <i> Pclass </i>", title_x=0.5, height=500, template='plotly_dark',
        font=dict(
            family="Rubik",
            size=16)
)

fig.show()

<blockquote style="margin-right:auto; font-family:Courier New; margin-left:auto; color:white; background-color: #0a3a6f; padding: 1em; margin:24px;">
   
<ul>
    <li> <font color="white" size=+2.0> <b> Men vs Pclass Statistical Summary: </b> </font>
    <ul>
        <li> <font color="white" size=+1.0> <b><i>Men</i></b> with <b><i>Pclass:1</i></b>, Average Age: <b>39</b></font>
        <li> <font color="white" size=+1.0> <b><i>Men</i></b> with <b><i>Pclass:2</i></b>, Average Age: <b>30.5</b></font>
        <li> <font color="white" size=+1.0> <b><i>Men</i></b> with <b><i>Pclass:3</i></b>, Average Age: <b>27</b> 
    </ul>
        
</ul> 
</blockquote>
                                                                                                                                            

<blockquote style="margin-right:auto; font-family:Courier New; margin-left:auto; color:white; background-color: #750522; padding: 1em; margin:24px;">
   
<ul>
    <li> <font color="white" size=+2.0> <b> Women vs Pclass Statistical Summary: </b> </font>
    <ul>
        <li> <font color="white" size=+1.0> <b><i>Women</i></b> with <b><i>Pclass:1</i></b>, Average Age: <b>34</b></font>
        <li> <font color="white" size=+1.0> <b><i>Women</i></b> with <b><i>Pclass:2</i></b>, Average Age: <b>29</b></font>
        <li> <font color="white" size=+1.0> <b><i>Women</i></b> with <b><i>Pclass:3</i></b>, Average Age: <b>23.6</b> 
    </ul>
        
</ul> 
</blockquote>
                                                                                                                                            

<h3 style="font-size:30px; text-align: left;background-color:royalblue; font-family:Rubik; color: #ffffff; padding: 16px; line-height: 1; border-radius:10px; border: 3px solid #3f484b;"> Gender X Embarked X Age</h3>

In [21]:
sex_embarked_age_df = df.groupby(['Sex', 'Embarked', 'Age']).size().reset_index().rename(columns={0: 'count'})

fig = px.treemap(sex_embarked_age_df, path=[px.Constant("Treemap"), 'Sex', 'Embarked'], values='count',
                  color='Age', hover_data=['count'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(sex_embarked_age_df['Age'], weights=sex_embarked_age_df['count']))

fig.update_traces(textinfo="label+percent parent")

fig.update_layout(title_text="<i> Gender </i> X <i> Embarked </i>", title_x=0.5, height=500, template='plotly_dark',
        font=dict(
            family="Rubik",
            size=16)
)

fig.show()

<blockquote style="margin-right:auto; font-family:Courier New; margin-left:auto; color:white; background-color: #0a3a6f; padding: 1em; margin:24px;">
   
<ul>
    <li> <font color="white" size=+2.0> <b> Men vs Embarked Statistical Summary: </b> </font>
    <ul>
        <li> <font color="white" size=+1.0> <b><i>Men</i></b> with <b><i>Embarked:S</i></b>, Average Age: <b>30</b></font>
        <li> <font color="white" size=+1.0> <b><i>Men</i></b> with <b><i>Embarked:Q</i></b>, Average Age: <b>29</b></font>
        <li> <font color="white" size=+1.0> <b><i>Men</i></b> with <b><i>Embarked:C</i></b>, Average Age: <b>31.5</b> 
    </ul>
        
</ul> 
</blockquote>
                                                                                                                                            

<blockquote style="margin-right:auto; font-family:Courier New; margin-left:auto; color:white; background-color: #750522; padding: 1em; margin:24px;">
   
<ul>
    <li> <font color="white" size=+2.0> <b> Women vs Embarked Statistical Summary: </b> </font>
    <ul>
        <li> <font color="white" size=+1.0> <b><i>Women</i></b> with <b><i>Embarked:S</i></b>, Average Age: <b>28</b></font>
        <li> <font color="white" size=+1.0> <b><i>Women</i></b> with <b><i>Embarked:Q</i></b>, Average Age: <b>26.8</b></font>
        <li> <font color="white" size=+1.0> <b><i>Women</i></b> with <b><i>Embarked:C</i></b>, Average Age: <b>28.2</b> 
    </ul>
        
</ul> 
</blockquote>
                                                                                                                                            

<h3 style="font-size:30px; text-align: left;background-color:royalblue; font-family:Rubik; color: #ffffff; padding: 16px; line-height: 1; border-radius:10px; border: 3px solid #3f484b;"> Other Notebooks</h3>

<a id=""></a>
<div class="list-group" id="list-tab" role="tablist"
     style="font-family:Rubik; font-size:150%;background-color: #ededed;
            border-radius:10px; color: royalblue; border-style: solid;border-color:#0a3a6f;">
    
   * [<i> India on the Rise </i>](https://www.kaggle.com/code/madhurpant/india-on-the-rise)
   * [<i>Heart Disease EDA</i>](https://www.kaggle.com/code/madhurpant/heart-disease-eda)
   * [<i>KNN Visualization and Optimization
</i>](https://www.kaggle.com/code/madhurpant/knn-visualization-and-optimization)

<a id="11"></a>
# <div style="font-size:40px; text-align: center; background-color: royalblue; font-family:Rubik; color: #ffffff; padding: 14px; line-height: 1;border-radius:20px; border: 4px solid #3f484b;"><b>The End</b></div>

[](http://)