In [1]:
import pandas as pd
import re
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

### Create a Pandas dataFrame

In [2]:
data = {
    'software_manufacturer': [
        'Plotly Inc.',  # for Plotly
        'Plotly Inc.',  # for Plotly
        'Python Community',          # for Seaborn
        'Python Community',          # for Matplotlib
        'Python Community',          # for Pandas
        'Python Community',          # for Numpy
        'Explosion AI',              # for spaCy
        'Oracle Corporation',        # for MySQL
        'SQLite Consortium',         # for SQLite3
        'Google',                    # for Google BigQuery
        'Django Foundation',         # for Django
        'Streamlit Inc.',            # for Streamlit
        'Python Community',          # for Pipenv
        'Microsoft',                 # for Powershell
        'Google',                    # for Google Sheets
        'Google',                    # for Google Docs
        'Google',                    # for Google Slides
        'Google',                    # for Google AppScript
        'Google',                    # for Google Analytics 4
        'Google',                    # for Google Tag Manager
        'Google',                    # for Chrome Developer Tools
        'Django Foundation',         # for Django Debug Tools
        'Google',                    # for Google Analytics Debugger
        'Microsoft',                 # for Power BI
        'Microsoft',                 # for Excel
        'Salesforce',                # for Tableau
        'Bootstrap',                 # for Bootstrap
        'Siemens',                   # for Siemens NX
        'Autodesk',                  # for Autodesk Fusion
        'Ansys'
    ],

    'domain': [
        'Data Analytics',
        'Web Development',
        'Data Analytics',
        'Data Analytics',
        'Data Analytics',
        'Data Analytics',
        'Data Analytics',
        'Data Engineering',
        'Data Engineering',
        'Data Engineering',
        'Web Development',
        'Web Development',
        'Web Development',
        'Web Development',
        'Business Intelligence',
        'Business Intelligence',
        'Business Intelligence',
        'Business Intelligence',
        'Digital Marketing',
        'Digital Marketing',
        'Digital Marketing',
        'Web Development',
        'Digital Marketing',
        'Business Intelligence',
        'Business Intelligence',
        'Business Intelligence',
        'Web Development',
        'Computer Aided Design',
        'Finite Element Method',
        'Computational Fluid Dynamics'
    ],

    'category': [
        'Visualization',
        'Framework',
        'Visualization',
        'Visualization',
        'Analysis',
        'Analysis',
        'Analysis',
        'DB Management',
        'DB Management',
        'Analysis',
        'Framework',
        'Framework',
        'Framework',
        'Framework',
        'Productivity Tools',
        'Productivity Tools',
        'Productivity Tools',
        'Productivity Tools',
        'Performance Optimization',
        'Tag Management',
        'Debugging',
        'Debugging',
        'Debugging',
        'Analysis',
        'Analysis',
        'Analysis',
        'Framework',
        'Product Design',
        'Product Design',
        'Mechanical Analysis'
    ],

    'tool': [
        'Plotly',
        'Dash',
        'Seaborn',
        'Matplotlib',
        'Pandas',
        'Numpy',
        'spaCy',
        'MySQL',
        'SQLite',
        'BigQuery',
        'Django',
        'Streamlit',
        'Pipenv',
        'Powershell',
        'Sheets',
        'Docs',
        'Slides',
        'AppScript',
        'Analytics 4',
        'Tag Manager',
        'Chrome Developer Tools',
        'Django Debug Tools',
        'Analytics Debugger',
        'Power BI',
        'Excel',
        'Tableau',
        'Bootstrap',
        'Siemens NX',
        'Autodesk Fusion',
        'Ansys Workbench'
    ],

    'platform': [
        'Python',
        'Python',
        'Python',
        'Python',
        'Python',
        'Python',
        'Python',
        'RDBMS',
        'RDBMS',
        'Cloud Platform',
        'Python',
        'Python',
        'Python',
        '.NET',
        'Workspace',
        'Workspace',
        'Workspace',
        'Workspace',
        'Marketing Platform',
        'Marketing Platform',
        'Workspace',
        'Python',
        'Marketing Platform',
        '.NET',
        '.NET',
        'Python',
        'HTML',
        'Python',
        'Python',
        'Python'
    ],

    'proficiency_level': [
        9,
        5,
        4,
        6,
        9,
        4,
        3,
        4,
        5,
        8,
        7,
        8,
        7,
        7,
        10,
        10,
        10,
        10,
        9,
        9,
        8,
        5,
        10,
        6,
        8,
        6,
        6,
        9,
        10,
        7
    ]
}

# Convert to DataFrame
df = pd.DataFrame(data)

### icon, proficiency_level and product_type mapping

In [3]:
# Define icon mapping
icon_mapping = {
    'Plotly Inc.': 'icons/tool_300/plotly_300x300.png',
    'Python Community': 'icons/tool_300/python_300x300.png',
    'Explosion AI': 'icons/tool_300/explosion_ai_300x300.png',
    'Oracle Corporation': 'icons/tool_300/oracle_300x300.png',
    'SQLite Consortium': 'icons/tool_300/sqlite_consortium_300x300.png',
    'Google': 'icons/tool_300/google_300x300.png',
    'Django Foundation': 'icons/tool_300/django_300x129.png',
    'Streamlit Inc.': 'icons/tool_300/streamlit_300x143.png',
    'Microsoft': 'icons/tool_300/microsoft_300x300.png',
    'Salesforce': 'icons/tool_300/salesforce_300x210.png',
    'Bootstrap': 'icons/tool_300/bootstrap_300x239.png',
    'Siemens': 'icons/tool_300/siemens_300x213.png',
    'Autodesk': 'icons/tool_300/autodesk_300x64.png',
    'Ansys': 'icons/tool_300/ansys_300x95.png',
}

# Define the license and product type mapping
product_type_mapping = {
    'Plotly': 'Open Source',
    'Dash': 'Open Source',
    'Seaborn': 'Open Source',
    'Matplotlib': 'Open Source',
    'Pandas': 'Open Source',
    'Numpy': 'Open Source',
    'spaCy': 'Open Source',
    'MySQL': 'Open Source',
    'SQLite': 'Open Source',
    'BigQuery': 'PaaS',
    'Django': 'Open Source',
    'Streamlit': 'SaaS',
    'Pipenv': 'Open Source',
    'Powershell': 'IaaS',
    'Sheets': 'SaaS',
    'Docs': 'SaaS',
    'Slides': 'SaaS',
    'AppScript': 'SaaS',
    'Analytics 4': 'SaaS',
    'Tag Manager': 'SaaS',
    'Chrome Developer Tools': 'SaaS',
    'Django Debug Tools': 'Open Source',
    'Analytics Debugger': 'SaaS',
    'Power BI': 'SaaS',
    'Excel': 'SaaS',
    'Tableau': 'SaaS',
    'Bootstrap': 'Open Source',
    'Siemens NX': 'Commercial',
    'Autodesk Fusion': 'Commercial',
    'Ansys Workbench': 'Commercial'
}

# Function to generate proficiency description
def generate_proficiency_description(level):
    patterns = [
        (r'1', 'I have just started exploring this.'),
        (r'2', 'I am slightly familiar with the basics.'),
        (r'3', 'I can understand the fundamental concepts.'),
        (r'4', 'I have some experience using this.'),
        (r'5', 'I have a moderate understanding of this.'),
        (r'6', 'I have a solid grasp and can apply this knowledge.'),
        (r'7', 'I am confident in using this independently.'),
        (r'8', 'I can tackle complex tasks using this.'),
        (r'9', 'I have significant hands-on experience with this.'),
        (r'10', 'I am an expert in using this.')
    ]
    
    # Create a string to match the proficiency level
    level_str = str(level)
    
    for pattern, description in patterns:
        if re.fullmatch(pattern, level_str):
            return description
    
    return 'Proficiency level not found.'

# Apply the function to create the categorical column in your DataFrame
df['proficiency_description'] = df['proficiency_level'].apply(generate_proficiency_description)

# Apply the product type mapping
df['product_type'] = df['tool'].map(product_type_mapping)

df['icons'] = df['software_manufacturer'].map(icon_mapping)

### Export the dataFrame to csv and Pickle

In [4]:
def export_dataframe(df, pickle_file_path, csv_file_path):
    """
    Export a DataFrame to both a pickle file and a CSV file.

    Parameters:
    df (pd.DataFrame): The DataFrame to export.
    pickle_file_path (str): The file path where the pickle file will be saved.
    csv_file_path (str): The file path where the CSV file will be saved.
    """
    # Export to pickle file
    df.to_pickle(pickle_file_path)
    print(f"DataFrame exported to pickle file: {pickle_file_path}")

    # Export to CSV file
    df.to_csv(csv_file_path, index=False)
    print(f"DataFrame exported to CSV file: {csv_file_path}")


# Assume df is your DataFrame
pickle_path = 'data.pkl'
csv_path = 'data.csv'

export_dataframe(df, pickle_path, csv_path)


DataFrame exported to pickle file: data.pkl
DataFrame exported to CSV file: data.csv


### Load data from a Pickle file

In [5]:

def load_data(file_path):
    """
    Load data from a CSV file.

    Parameters:
    file_path (str): The path to the CSV file.

    Returns:
    df (pd.DataFrame): The loaded DataFrame.
    """
    df = pd.read_pickle(file_path)
    return df

file_path = 'data.pkl'
df = load_data(file_path)

df.head(3)

Unnamed: 0,software_manufacturer,domain,category,tool,platform,proficiency_level,proficiency_description,product_type,icons
0,Plotly Inc.,Data Analytics,Visualization,Plotly,Python,9,I have significant hands-on experience with this.,Open Source,icons/tool_300/plotly_300x300.png
1,Plotly Inc.,Web Development,Framework,Dash,Python,5,I have a moderate understanding of this.,Open Source,icons/tool_300/plotly_300x300.png
2,Python Community,Data Analytics,Visualization,Seaborn,Python,4,I have some experience using this.,Open Source,icons/tool_300/python_300x300.png


### Custom hover text function

In [6]:
# Create custom hover text based on node type
def generate_custom_hover(row, is_child):
    if is_child:
        return (
            f"Platform: "
            f"{row['platform'] if any(part in row['software_manufacturer'] for part in row['platform'].split()) else row['software_manufacturer'] + ' (' + row['platform'] + ')'}<br>"
            f"Tool: "
            f"{row['tool'] if any(part in row['software_manufacturer'] for part in row['tool'].split()) else row['software_manufacturer'] + ' (' + row['tool'] + ')'}<br>"
            f"Primarily used in {row['domain'].lower()}<br>"
            f"Category: {row['category']}<br>"
            f"Proficiency Level: {row['proficiency_level']}<br>"
            f"It is {'an' if row['product_type'] in ['Open Source', 'FaaS'] else 'a'} {row['product_type']} product"
        )
    else:
        return f"{row[column_name]}<br>Proficiency Level: {row['proficiency_level'].sum()}"

# Add custom hover text based on the node type (child or parent)
df['hover_template'] = df.apply(
    lambda row: generate_custom_hover(row, True),  # Initially set all as children
    axis=1
)


### Color discrete map

In [8]:
def create_color_map(df, category_column):
    """
    Create a color map for unique values in a specified column.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    category_column (str): The column name with categorical data.

    Returns:
    color_discrete_map (dict): A dictionary mapping unique categories to colors.
    """
    # Extract unique categories
    unique_values_list = df[category_column].unique().tolist()
    
    # Define a color map
    color_map = px.colors.sequential.Jet[:len(unique_values_list)]
    color_discrete_map = dict(zip(unique_values_list, color_map))
    
    return color_discrete_map

# Assuming df is your DataFrame and 'category' is the column with unique values
color_discrete_map = create_color_map(df, 'category')

### Function to create, update and save plots

In [71]:
def prepare_and_save_plot(fig, width=600, height=600, template='plotly_dark', 
                          plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', 
                          font_family="Open Sans, sans-serif", font_size=10, font_color="white", 
                          hover_template=None, base_filename='plot', auto_open=False, 
                          counter=1, title=None, xaxis_title=None, yaxis_title=None, 
                          colorbar_title=None):
    """
    Update the layout of the Plotly figure, set the hover template, and save the plot to an HTML file
    with an automatically incremented filename.

    Parameters:
    fig (go.Figure): The Plotly figure to be updated.
    width (int): Width of the figure. Default is 600.
    height (int): Height of the figure. Default is 600.
    template (str): Plotly template for the figure. Default is 'plotly_dark'.
    plot_bgcolor (str): Background color of the plot area. Default is transparent.
    paper_bgcolor (str): Background color of the paper area. Default is transparent.
    font_family (str): Font family for the text. Default is 'Roboto, sans-serif'.
    font_size (int): Font size for the text. Default is 10.
    font_color (str): Font color for the text. Default is 'white'.
    hover_template (str): Custom hover template for traces. Default is None.
    base_filename (str): Base name for the file. Default is 'plot'.
    auto_open (bool): Whether to open the file in the browser after saving. Default is False.
    counter (int): Counter for incrementing filenames. Default is 1.

    Returns:
    fig (go.Figure): The updated Plotly figure.
    """
    # Update layout using provided parameters
    fig.update_layout(
        width=width,
        height=height,
        template=template,
        plot_bgcolor=plot_bgcolor,
        paper_bgcolor=paper_bgcolor,
        font=dict(
            family=font_family,
            size=font_size,
            color=font_color
        ),
        title=title,
        coloraxis_colorbar_title=colorbar_title,  # Add colorbar legend title
        xaxis_title=xaxis_title,
        yaxis_title=yaxis_title
    )
    
    # Set custom hover template if provided
    if hover_template:
        fig.update_traces(
            hovertemplate=hover_template
        )
    
    # Generate filename with counter
    filename = f'{base_filename}_{counter}.html'
    
    # Save the figure to an HTML file
    pio.write_html(fig, file=filename, auto_open=auto_open)
    
    return fig

In [72]:
def snake_to_title_case(snake_str):
    """
    Convert a snake_case string to Title Case.

    Parameters:
    snake_str (str): The snake_case string to convert.

    Returns:
    str: The Title Case string.
    """
    components = snake_str.split('_')
    title_str = ' '.join(x.title() for x in components)
    return title_str

def create_dimension_labels(dimensions):
    """
    Convert a list of snake_case dimension names to Title Case labels.

    Parameters:
    dimensions (list of str): List of snake_case dimension names.

    Returns:
    list of str: List of Title Case dimension labels.
    """
    return [snake_to_title_case(dimension) for dimension in dimensions]



In [98]:
def create_parallel_categories_plot(df, dimensions, color, colorbar_title=None, labels=None):
    """
    Create a parallel categories plot using Plotly Express.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    dimensions (list of str): List of column names to define the dimensions in the parallel categories plot.
    color (str): The column name for colors of the categories.
    color_discrete_map (dict): A dictionary mapping unique values in the color column to colors.
    custom_data (list of str): List of column names for custom hover data. Default is None.

    Returns:
    fig (go.Figure): The Plotly parallel categories figure.
    """
    fig = px.parallel_categories(
        df,
        dimensions=dimensions,
        color=color,
        labels=labels
    )
    
    return fig

### Creating and updating the Sunburst plot for the entire skills section

In [99]:
dimensions = ['software_manufacturer', 'domain', 'category', 'tool']
color = 'proficiency_level'
colorbar_title = 'Level'

fig = create_parallel_categories_plot(
    df,
    dimensions=dimensions,
    color=color,
    colorbar_title=colorbar_title,
    labels = {
    'software_manufacturer': 'Corporation',
    'domain': 'Domain',
    'category': 'Category',
    'tool': 'Tool'
    }
)

prepare_and_save_plot(
    fig,
    width=1150,
    height=650,
    #title="Skill Distribution and Proficiency ",
    base_filename='parallel_categories_plot',
    auto_open=True,
    counter='skills',
    colorbar_title=colorbar_title
)