In [2]:
import pandas as pd
from analyzer import *
import plotly.graph_objects as go
import plotly.io as pio
df = pd.read_csv('lista_zawodow_filtrowana.csv')
df.head()

Unnamed: 0,zawód,typ
0,Administrator baz danych,p
1,Administrator bezpieczeństwa informacji (Inspe...,a
2,Administrator stron internetowych,a
3,Administrator systemów komputerowych,p
4,Analityk baz danych,a


In [2]:
df.sort_values(by=['typ'])

Unnamed: 0,zawód,typ
29,Statystyk,a
1,Administrator bezpieczeństwa informacji (Inspe...,a
2,Administrator stron internetowych,a
21,Operator aplikacji komputerowych,a
4,Analityk baz danych,a
5,Analityk sieci komputerowych,a
6,Analityk systemów teleinformatycznych,a
31,Technik cyfrowych procesów graficznych,a
30,Technik analityk,a
10,Grafik komputerowy DTP,a


In [3]:
def plot_vital_competencies_per_role(comps, role='analityków'):
    "plots vital competencies in 2018, 2020, 2022 for role in one big plot"
    # Extract keys and values
    keys = list(comps[0].keys())
    values1 = list(comps[0].values())
    values2 = list(comps[1].values())
    values3 = list(comps[2].values())

    # Create the bar plot
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=keys,
        y=values1,
        name='2018'
    ))

    fig.add_trace(go.Bar(
        x=keys,
        y=values2,
        name='2020'
    ))

    fig.add_trace(go.Bar(
        x=keys,
        y=values3,
        name='2022'
    ))

    # Update the layout to show the bars grouped
    fig.update_layout(
        barmode='group',
        title=f'Kluczowe kompetencje wśród {role} w latach 2018-2022',
        xaxis_title='Kompetencje BKL',
        yaxis_title='Punktowanie'
    )
    return fig

In [4]:
def plot_competencies(competency_sums, top_5_most, top_5_least, title, filename, kompetencje_klucz_nazwa=None):
    # Extract the keys and values from the competency sums dictionary
    competencies = list(competency_sums.keys())
    values = list(competency_sums.values())
    
    # Create a color list where default color is skyblue
    colors = ['skyblue'] * len(values)
    
    # Assign colors to the top 5 most and least popular competencies
    for comp in top_5_most.keys():
        idx = competencies.index(comp)
        colors[idx] = 'green'
    for comp in top_5_least.keys():
        idx = competencies.index(comp)
        colors[idx] = 'red'
    # Create the bar plot
    fig = go.Figure()
    remap_labels = kompetencje_klucz_nazwa
    print(df.index)
    #for dict_name in df.columns[:-1]:  # Exclude the 'Total' column
    fig.add_trace(go.Bar(
        x=competencies,
        y=values, 
        customdata=[remap_labels[key] for key in competencies],
        hovertemplate='<b>%{customdata}</b><br>Wynik: %{y}<extra></extra>',
    ))

# Update the layout to show the bars grouped and with custom colors
    fig.update_traces(marker=dict(color=colors))

    fig.update_layout(
        barmode='group',
        title=title,
        xaxis_title='Kompetencje BKL',
        yaxis_title='Wynik (skala 1-5)'
    )
    # Save the plot to an HTML file
    pio.write_html(fig, file=f'./results/plots/{title}.html', auto_open=True)
    return fig

In [5]:

df = pd.read_csv(df_bkl_2018_2022_filename)

# Filter rows based on key positions
df_it = filter_it(df)
print("IT related rows:", df_it.shape[0])

# Divide by year and job type
dfs = divide_by_year_and_job_type(df_it)

# Count competencies for each df and save results

years = [2018, 2020, 2022]
job_types = ['a', 'p', 't']
job_type_names = {'a': 'Analysts', 'p': 'Programmers', 't': 'Technicians'}

# Prepare results list for CSV
results = []

# Ensure results directories exist
os.makedirs('results/plots', exist_ok=True)
comps = []
for year in years:
    comps_role = []
    for job_type in job_types:
        competencies_count = count_competencies(dfs[year][job_type])
        comps_role.append(competencies_count)
        top_5_most, top_5_least = determine_popularities(competencies_count)
    comps.append(comps_role)


fig1 = plot_vital_competencies_per_role(comps[0])
fig1.show()


  df = pd.read_csv(df_bkl_2018_2022_filename)


IT related rows: 193


In [6]:
fig2 = plot_vital_competencies_per_role(comps[1], role='programistów')
# Show the plot
fig2.show()

In [7]:
fig3 = plot_vital_competencies_per_role(comps[2], role='specjalistów telekomunikacji')
# Show the plot
fig3.show()

In [11]:
figs = []
for year in years:
    market_share=[]
    for job_type in job_types:
        competencies_count = count_competencies(dfs[year][job_type])
        top_5_most, top_5_least = determine_popularities(competencies_count)
        market_share.append(len(dfs[year][job_type]))
        # Save plot
        title = f"Competencies for {job_type_names[job_type]} in {year}"
        filename = f"results/plots/{job_type_names[job_type]}_{year}.png"
        fig = plot_competencies(competencies_count, top_5_most, top_5_least, title, filename, kompetencje_klucz_nazwa=kompetencje_klucz_nazwa)
        fig.show()
    figx = go.Figure(data=[go.Pie(
    labels=list(job_type_names.values()),  # Categories as labels
    values=market_share,  # Counts as values
    hole=0.3  # Set the size of the hole in the middle of the pie chart (0 for no hole)
)])

    # Update layout
    figx.update_layout(
        title=f'Struktura rynku IT ujętego w BKL w roku {year} <br>- udział firm o profilu analitycznym/programistycznym/technicznym',
        showlegend=True,
        height=400,  # Adjust the height of the plot area
    width=800,  # Show legend
    )

# Show plot
    figx.show()
    pio.write_html(figx, file=f'./results/plots/struktura_rynku_{year}.html', auto_open=True)
        # # Add results to the list for CSV
        # result_row = {
        #     'year': year,
        #     'job_type': job_type_names[job_type],
        #     **{f'top_{i+1}': kompetencje_klucz_nazwa[comp] for i, comp in enumerate(top_5_most.keys())},
        #     **{f'least_{i+1}': kompetencje_klucz_nazwa[comp] for i, comp in enumerate(top_5_least.keys())}
        # }
        # results.append(result_row)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)
