In [1]:
pip install -U kaleido

Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import plotly.express as px
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Generate dummy data for three job titles
np.random.seed(42)

# Data Engineering skills cluster
data_eng_skills = np.random.normal(loc=[0.8, 0.7, 0.6], scale=0.1, size=(50, 3))

# Data Science skills cluster
data_sci_skills = np.random.normal(loc=[0.3, 0.8, 0.9], scale=0.1, size=(50, 3))

# ML Engineering skills cluster
ml_eng_skills = np.random.normal(loc=[0.6, 0.4, 0.8], scale=0.1, size=(50, 3))

# Combine all skills
all_skills = np.vstack([data_eng_skills, data_sci_skills, ml_eng_skills])

# Create labels
labels = (
    ['Data Engineer'] * 50 +
    ['Data Scientist'] * 50 +
    ['ML Engineer'] * 50
)

# Create a DataFrame
df = pd.DataFrame(
    all_skills,
    columns=['Technical Skills', 'Analytics Skills', 'Programming Skills']
)
df['Job Title'] = labels

# Create the 3D scatter plot
fig = px.scatter_3d(
    df,
    x='Technical Skills',
    y='Analytics Skills',
    z='Programming Skills',
    color='Job Title',
    title='Job Skills Clusters by Role',
    color_discrete_sequence=['#FF6B6B', '#AEFF55', '#8599FF'],
    opacity=0.7
)

# Update layout for better visualization
fig.update_traces(marker=dict(size=6))
fig.update_layout(
    scene=dict(
        xaxis_title='Technical Skills',
        yaxis_title='Analytics Skills',
        zaxis_title='Programming Skills',
        camera=dict(
            up=dict(x=0, y=0, z=1),
            center=dict(x=0, y=0, z=0),
            eye=dict(x=1.5, y=1.5, z=1.5)
        )
    ),
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    ),
    margin=dict(l=0, r=0, t=30, b=0)
)

# Show the plot
fig.show()

# Export options

# set export directory
import os
os.chdir("./")

# Export as HTML
fig.write_html("job_skills_clusters.html")

# Export as PNG
fig.write_image("job_skills_clusters.png")