In [None]:
def paste_px_format(figure, **kwargs):
    """Updates Layout of the Figure with custom setting"""
    return figure.update_layout(**kwargs,
        font={'color': 'Gray', 'size': 10},
        width=780, margin={'pad': 10})


def save_figure(fig, name):
    with open(name, "wb") as f:
        save = scope.transform(fig, format="png")
        f.close()
        
        
def add_bubble(fig, **kwargs):
    """Creates shape ontop of the figure"""
    return fig.add_shape(
        type="circle",
        line_color="white", 
        fillcolor="orange",
        opacity=0.6,
        xref='paper', yref='paper',
        x0=0.5, y0=0.6)


def prepare_data_salary(df, col)-> pd.DataFrame:
    """
    Returns a cross tab of selected for col parameter
        as a pandas dataframe.
    """
    df_concat = pd.concat(
        [df[col], survey['CompTotal']], axis=1)\
        .dropna()
    
    df_concat['salary_cat'] = pd.qcut(
        df_concat['CompTotal'], 5, labels=[
        "Low(<10,000)", "Low-Med(10k-49k)",
        "Medium(49k-85k)", "High(85k-150k)",
        "Very High(150<)"])
    
    crosstab = pd.crosstab(
        df_concat[col],
        df_concat['salary_cat'])
    
    return crosstab


def get_files():
    for dirname, _, filenames in os.walk('/kaggle/input'):
        for filename in filenames:
            return (os.path.join(dirname, filename))

In [None]:
def missing_bar()-> go.Figure:
    """Plots Missing Data for Whole Dataset."""
    title = 'Survey <b>Missing</b> Data by Features'
    
    # counts missing data
    missing_data = survey.isna().sum()
    missing_data = missing_data.to_frame().reset_index().rename(
        columns={'index': 'data_cols', 0: 'counts'})
    missing_data = missing_data.sort_values(by='counts', ascending=False)
    missing_perc = np.round(
        (survey.isna().sum().sum() / survey.size) * 100, 2)
    
    # figure colors
    colors = ['Gray'] * len(missing_data)
    colors[:10] = ['Orange']
    
    
    # create figure
    fig = go.Figure()
    for labels, values\
    in zip(missing_data.data_cols.to_list(), missing_data.counts):
    
        fig.add_trace(go.Bar(
            y=[labels],
            x=[values],
            name=labels,
            orientation='h'))
    
    # tweak layout
    fig.update_traces(marker_color=colors)
    fig.update_xaxes(title='Missing Counts')
    fig.update_yaxes(title='Features', tickmode='linear')
    
    fig.add_annotation(xref='paper', yref='paper',
        x=0.71, y=0.70, text=f"""
            {missing_perc}%""",
        font={'size': 20, 'color': 'White'},
        showarrow=False)
    
    fig.add_annotation(xref='paper', yref='paper',
        x=0.68, y=0.67, text=f"""Missing""",
        font={'size': 15, 'color': 'Gray'},
        showarrow=False)
    
    add_bubble(fig)

    return paste_px_format(
        fig, title=title, height=1000, showlegend=False)

In [None]:
missing_bar()

In [None]:
# sexuality values to dataframe
sexuality = survey['SexualOrientation'].value_counts().to_frame().T 

# wrangle data
sexuality_hetero = sexuality['Straight or heterosexual'].to_frame()
sexuality_lgbtq = sexuality.iloc[:, 2:]
sexuality_lgbtq = sexuality_lgbtq.sum(axis=1)\
    .to_frame()\
    .rename(columns={0: 'LGBTQ+'})

# prepare data for plot
sexuality = pd.concat([sexuality_hetero, sexuality_lgbtq], axis=1)

In [None]:
def plot_donut():
    
    # counts missing data
    missing_data = survey.isna().sum()
    missing_data = missing_data.to_frame().reset_index().rename(
        columns={'index': 'data_cols', 0: 'counts'})
    missing_data = missing_data.sort_values(by='counts', ascending=False)
    missing_perc = np.round(
        (survey.isna().sum().sum() / survey.size) * 100, 2)
    
    # figure colors
    colors = ['Gray'] * len(missing_data)
    colors[:10] = ['Orange']
    
    # figure colors
    labels = ['Heterosexual','LGBTQ+']
    
    # explosion
    explode = (0.05, 0.05)
    
    # Pie Chart
    plt.pie(sexuality.iloc[0,:], colors=colors, labels=labels,
        autopct='%1.1f%%', pctdistance=0.85,
        explode=explode)
    
    # draw circle
    centre_circle = plt.Circle((0, 0), 0.50, fc='white')
    fig = plt.gcf()

    # Adding Circle in Pie chart
    fig.gca().add_artist(centre_circle)

    # Adding Title of chart
    plt.title('Sexuality distribution for Survey')

    # Displaying Chart
    plt.show()

In [None]:
plot_donut()

In [None]:
def plot_races():
    
    # counts missing data
    missing_data = survey.isna().sum()
    missing_data = missing_data.to_frame().reset_index().rename(
        columns={'index': 'data_cols', 0: 'counts'})
    missing_data = missing_data.sort_values(by='counts', ascending=False)
    missing_perc = np.round(
        (survey.isna().sum().sum() / survey.size) * 100, 2)
    
    # figure colors
    colors = ['Gray'] * len(missing_data)
    colors[:10] = ['Orange']
    
    # figure colors
    labels = ['Heterosexual','LGBTQ+']
    
    # explosion
    explode = (0.05, 0.05)
    
    # Pie Chart
    plt.pie(sexuality.iloc[0,:], colors=colors, labels=labels,
        autopct='%1.1f%%', pctdistance=0.85,
        explode=explode)
    
    # draw circle
    centre_circle = plt.Circle((0, 0), 0.50, fc='white')
    fig = plt.gcf()

    # Adding Circle in Pie chart
    fig.gca().add_artist(centre_circle)

    # Adding Title of chart
    plt.title('Sexuality distribution for Survey')

    # Displaying Chart
    plt.show()

In [None]:
ethnicity

In [None]:
# continuous color map
cmap = np.flip(["#ff9f1c","#ffbf69","#ffca85","#e6e6e6","#f0f0f0"])


def plot_heatmap(z,y,x, title, **kwargs)-> go.Figure:
    """Plots Annotated Plotly Heatmap."""
    fig = ff.create_annotated_heatmap(z=z,
        x=x, y=y, colorscale=cmap)
    fig.update_traces(xgap=1, ygap=1)
    return paste_px_format(fig, title=title, **kwargs)