In [1]:
import pandas as pd
from analyzer import *
import plotly.graph_objects as go
import plotly.io as pio
df = pd.read_csv('lista_zawodow_filtrowana.csv')
df.head()

Unnamed: 0,zawód,typ
0,Administrator baz danych,p
1,Administrator bezpieczeństwa informacji (Inspe...,a
2,Administrator stron internetowych,a
3,Administrator systemów komputerowych,p
4,Analityk baz danych,a


In [2]:
df.sort_values(by=['typ'])

Unnamed: 0,zawód,typ
29,Statystyk,a
1,Administrator bezpieczeństwa informacji (Inspe...,a
2,Administrator stron internetowych,a
21,Operator aplikacji komputerowych,a
4,Analityk baz danych,a
5,Analityk sieci komputerowych,a
6,Analityk systemów teleinformatycznych,a
31,Technik cyfrowych procesów graficznych,a
30,Technik analityk,a
10,Grafik komputerowy DTP,a


In [3]:
def plot_vital_competencies_per_role(comps, role='analityków'):
    "plots vital competencies in 2018, 2020, 2022 for role in one big plot"
    # Extract keys and values
    keys = list(comps[0].keys())
    values1 = list(comps[0].values())
    values2 = list(comps[1].values())
    values3 = list(comps[2].values())

    # Create the bar plot
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=keys,
        y=values1,
        name='2018'
    ))

    fig.add_trace(go.Bar(
        x=keys,
        y=values2,
        name='2020'
    ))

    fig.add_trace(go.Bar(
        x=keys,
        y=values3,
        name='2022'
    ))

    # Update the layout to show the bars grouped
    fig.update_layout(
        barmode='group',
        title=f'Kluczowe kompetencje wśród {role} w latach 2018-2022',
        xaxis_title='Kompetencje BKL',
        yaxis_title='Punktowanie'
    )
    return fig

In [4]:
def plot_competencies(competency_sums, top_5_most, top_5_least, title, filename, kompetencje_klucz_nazwa=None):
    # Extract the keys and values from the competency sums dictionary
    competencies = list(competency_sums.keys())
    values = list(competency_sums.values())
    
    # Create a color list where default color is skyblue
    colors = ['skyblue'] * len(values)
    
    # Assign colors to the top 5 most and least popular competencies
    for comp in top_5_most.keys():
        idx = competencies.index(comp)
        colors[idx] = 'green'
    for comp in top_5_least.keys():
        idx = competencies.index(comp)
        colors[idx] = 'red'
    # Create the bar plot
    fig = go.Figure()
    remap_labels = kompetencje_klucz_nazwa
    print(df.index)
    #for dict_name in df.columns[:-1]:  # Exclude the 'Total' column
    fig.add_trace(go.Bar(
        x=competencies,
        y=values, 
        customdata=[remap_labels[key] for key in competencies],
        hovertemplate='<b>%{customdata}</b><br>Wynik: %{y}<extra></extra>',
    ))

# Update the layout to show the bars grouped and with custom colors
    fig.update_traces(marker=dict(color=colors))

    fig.update_layout(
        barmode='group',
        title=title,
        xaxis_title='Kompetencje BKL',
        yaxis_title='Wynik (skala 1-5)'
    )
    # Save the plot to an HTML file
    pio.write_html(fig, file=f'./results/plots/{title}.html', auto_open=True)
    return fig

In [5]:

df = pd.read_csv(df_bkl_2018_2022_filename)

# Filter rows based on key positions
df_it = filter_it(df)
print("IT related rows:", df_it.shape[0])

# Divide by year and job type
dfs = divide_by_year_and_job_type(df_it)

# Count competencies for each df and save results

years = [2018, 2020, 2022]
job_types = ['a', 'p', 't']
job_type_names = {'a': 'Analysts', 'p': 'Programmers', 't': 'Technicians'}

# Prepare results list for CSV
results = []

# Ensure results directories exist
os.makedirs('results/plots', exist_ok=True)
comps = []
for year in years:
    comps_role = []
    for job_type in job_types:
        competencies_count = count_competencies(dfs[year][job_type])
        comps_role.append(competencies_count)
        top_5_most, top_5_least = determine_popularities(competencies_count)
    comps.append(comps_role)


fig1 = plot_vital_competencies_per_role(comps[0])
fig1.show()


  df = pd.read_csv(df_bkl_2018_2022_filename)


IT related rows: 193


In [6]:
fig2 = plot_vital_competencies_per_role(comps[1], role='programistów')
# Show the plot
fig2.show()

In [7]:
fig3 = plot_vital_competencies_per_role(comps[2], role='specjalistów telekomunikacji')
# Show the plot
fig3.show()

In [8]:
figs = []
for year in years:
    market_share=[]
    for job_type in job_types:
        competencies_count = count_competencies(dfs[year][job_type])
        top_5_most, top_5_least = determine_popularities(competencies_count)
        market_share.append(len(dfs[year][job_type]))
        # Save plot
        title = f"Competencies for {job_type_names[job_type]} in {year}"
        filename = f"results/plots/{job_type_names[job_type]}_{year}.png"
        fig = plot_competencies(competencies_count, top_5_most, top_5_least, title, filename, kompetencje_klucz_nazwa=kompetencje_klucz_nazwa)
        fig.show()
    figx = go.Figure(data=[go.Pie(
    labels=list(job_type_names.values()),  # Categories as labels
    values=market_share,  # Counts as values
    hole=0.3  # Set the size of the hole in the middle of the pie chart (0 for no hole)
)])

    # Update layout
    figx.update_layout(
        title=f'Struktura rynku IT ujętego w BKL w roku {year} <br>- udział firm o profilu analitycznym/programistycznym/technicznym',
        showlegend=True,
        height=400,  # Adjust the height of the plot area
    width=800,  # Show legend
    )

# Show plot
    figx.show()
    pio.write_html(figx, file=f'./results/plots/struktura_rynku_{year}.html', auto_open=True)
        # # Add results to the list for CSV
        # result_row = {
        #     'year': year,
        #     'job_type': job_type_names[job_type],
        #     **{f'top_{i+1}': kompetencje_klucz_nazwa[comp] for i, comp in enumerate(top_5_most.keys())},
        #     **{f'least_{i+1}': kompetencje_klucz_nazwa[comp] for i, comp in enumerate(top_5_least.keys())}
        # }
        # results.append(result_row)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


RangeIndex(start=0, stop=3149, step=1)


W9 - Biorąc pod uwagę wszystkie te wymagania, na jakie
MIESIĘCZNE wynagrodzenie BRUTTO mogłaby liczyć
zatrudniona osoba pracująca na tym stanowisku w pełnym
wymiarze godzin?

In [110]:
results_pay = {year: dict() for year in years}
for year in years:
    for job_type in job_types:
        #if 'W4' in dfs[year][job_type].columns.tolist():
        df = dfs[year][job_type]
        df['W9'] = df['W9'].fillna(0).astype(int)
        filtered_df = df[df['W9'] != 0]
        mean_exp = filtered_df['W9'].mean()
        results_pay[year][job_type] = mean_exp
results_pay
data = results_pay
# Prepare data for plotting
categories = ['a', 'p', 't']
years = list(data.keys())
a_values = [data[year]['a'] for year in years]
p_values = [data[year]['p'] for year in years]
t_values = [data[year]['t'] for year in years]

# Create traces for each year
trace_2018 = go.Bar(
    x=categories,
    y=[data[2018][cat] for cat in categories],
    text=[round(data[2018][cat],2) for cat in categories],
    name='2018'
)

trace_2020 = go.Bar(
    x=categories,
    y=[data[2020][cat] for cat in categories],
    text=[round(data[2020][cat],2) for cat in categories],
    name='2020'
)

trace_2022 = go.Bar(
    x=categories,
    y=[data[2022][cat] for cat in categories],
    text=[round(data[2022][cat], 2) for cat in categories],
    name='2022'
)

# Create the figure with grouped bar mode
fig = go.Figure(data=[trace_2018, trace_2020, trace_2022])

fig.update_layout(
    barmode='group',
    title='Miesięczne wynagrodzenie BRUTTO na jakie może liczyć osoba zatrudniona na tym stanowisku',
    xaxis_title='Kategoria - analitycy, programiści, technicy',
    yaxis_title='Wynagrodzenie (PLN)'
)

# Show plot
fig.show()

In [34]:
results_exp = {year: dict() for year in years}
for year in years:
    for job_type in job_types:
        #if 'W4' in dfs[year][job_type].columns.tolist():
        df = dfs[year][job_type]
        df['W4'] = df['W4'].fillna(0).astype(int)
        filtered_df = df[df['W4'] != 0]
        mean_exp = filtered_df['W4'].mean()
        results_exp[year][job_type] = mean_exp
results_exp

{2018: {'a': 2.2857142857142856, 'p': 3.0344827586206895, 't': 2.6},
 2020: {'a': 5.25, 'p': 2.6470588235294117, 't': 3.0},
 2022: {'a': 2.4285714285714284, 'p': 3.238095238095238, 't': 1.5}}

In [67]:
# Prepare data for plotting
data = results_exp
# Prepare data for plotting
categories = ['a', 'p', 't']
years = list(data.keys())
a_values = [data[year]['a'] for year in years]
p_values = [data[year]['p'] for year in years]
t_values = [data[year]['t'] for year in years]

# Create traces for each year
trace_2018 = go.Bar(
    x=categories,
    y=[data[2018][cat] for cat in categories],
    name='2018'
)

trace_2020 = go.Bar(
    x=categories,
    y=[data[2020][cat] for cat in categories],
    name='2020'
)

trace_2022 = go.Bar(
    x=categories,
    y=[data[2022][cat] for cat in categories],
    name='2022'
)

# Create the figure with grouped bar mode
fig = go.Figure(data=[trace_2018, trace_2020, trace_2022])

fig.update_layout(
    barmode='group',
    title='Długość wymaganego doświadczenia z podziałem na stanowiska',
    xaxis_title='Kategoria - analitycy, programiści, technicy',
    yaxis_title='Doświadczenie (lata)'
)

# Show plot
fig.show()

In [33]:
df = dfs[year][job_type]
filtered_df = df[df['W4'] != 0]
filtered_df['W4'].mean()

1.5

## Problemy ze znalezieniem odpowiednich pracowników

In [65]:
df_year = []
for year in years:
    df_toconcat = []
    for job_type in job_types:
        df_toconcat.append(dfs[year][job_type])
    df = pd.concat(df_toconcat)
    df['year']=year
    df_year.append(df)

df_ = pd.concat(df_year)

# Group by 'year' and 'answer', then count occurrences
grouped_df = df_.groupby(['year', 'T1']).size().reset_index(name='count')

# Create the plot
fig = px.bar(grouped_df, x='year', y='count', color='T1', barmode='group',
             title='Czy były problemy ze znalezieniem kandydatów na kluczowe stanowiska',
             labels={'count': 'Liczba odpowiedzi', 'year': 'Rok', 'T1': 'Odpowiedź'})

# Show plot
fig.show()

In [None]:
# 

Wymienię teraz różne narzędzia stosowane do
zarządzania zasobami ludzkimi, personelem.
Proszę powiedzieć które z nich są stosowane w
Państwa firmie/instytucji

In [84]:
def calculate_benefits(method_column, years, df_year):
    methods = [n for n in range(1, 14)]
    methods_stats = {method: dict() for method in methods}
    for year, df_y in zip(years, df_year):
        for method in methods:
            sum_used = df_y[f'{method_column}_{method}'].eq('Tak').sum()
            methods_stats[method][year] = sum_used
    return methods_stats

In [85]:
methods_statistics = calculate_benefits('Z10',years, df_year)

In [101]:
def plot_benefits(data, key_mapping, title):
    data = {key_mapping[old_key]: value for old_key, value in data.items()}
    categories = list(data.keys())
    years = [2018, 2020, 2022]

    # Initialize lists for each year
    counts_2018 = []
    counts_2020 = []
    counts_2022 = []

    # Populate the lists with values from the data dictionary
    for category in categories:
        counts_2018.append(data[category][2018])
        counts_2020.append(data[category][2020])
        counts_2022.append(data[category][2022])

    # Create traces for each year
    trace_2018 = go.Bar(x=counts_2018,y=categories, name='2018',orientation='h', marker=dict(color='blue'))
    trace_2020 = go.Bar(x=counts_2020,y=categories,  name='2020',orientation='h', marker=dict(color='orange'))
    trace_2022 = go.Bar(x=counts_2022,y=categories,  name='2022',orientation='h', marker=dict(color='green'))

    # Create the layout
    layout = go.Layout(
        title=title,
        xaxis=dict(title='Liczba'),
        yaxis=dict(title='Kategoria'),
        barmode='group', 
        width=1000,  # Adjust the width to make the chart longer
        height=1000  
    )

    # Create the figure
    fig = go.Figure(data=[trace_2018, trace_2020, trace_2022], layout=layout)

    # Show the plot
    return fig

In [102]:
# Transform data into lists for Plotly
data=methods_stats
key_mapping = {
    1: 'Opisy stanowisk pracy',
    2: 'Wartościowanie stanowisk pracy',
    3: 'Wystandaryzowane narzędzia selekcji kandydatów',
    4: 'Baza danych kandydatów',
    5: 'Narzędzia adaptacji pracowników',
    6: 'System wynagrodzeń uwzględniający hierarchię stanowisk i uwarunkowania rynkowe',
    7: 'System pozapłacowych mechanizmów motywacji pracowników',
    8: 'Procedury zwalniania (derekrutacji) pracowników',
    9: 'Plan zastępowania odchodzących pracowników',
    10: 'Plan zatrudnienia',
    11: 'Indywidulane plany rozwoju/ścieżki kariery pracowników',
    12: 'Narzędzia identyfikacji pracowników o wysokim potencjale rozwoju',
    13: 'Okresowa ocena pracowników'
}
fig = plot_benefits(data, key_mapping, 'Narzędzia stosowane do zarządzania zasobami ludzkimi')
fig.show()

Jakie, POZA WYNAGRODZENIEM, mechanizmy
motywacji pracowników wykorzystuje Państwa
firma/instytucja? (Tak/Nie)

In [103]:
benefits_stats = calculate_benefits('Z11', years, df_year)

In [104]:
# Transform data into lists for Plotly
key_mapping = {
    1: 'Zaangażowanie pracowników w podejmowanie decyzji',
    2: 'Ustanawianie celów efektywnościowych',
    3: 'Zwiększanie zakresu odpowiedzialności pracowników',
    4: 'Awans',
    5: 'Pochwała ze strony przełożonego',
    6: 'Dodatkowe pakiety ubezpieczeniowe, w tym zdrowotne',
    7: 'Karty / karnety fitness',
    8: 'Karty / karnet do instytucji kulturalnych',
    9: 'Szkolenia dodatkowe',
    10: 'Wyjazdy / imprezy integracyjne',
    11: 'Bony okazjonalne',
    12: 'Dofinansowane / bezpłatne posiłki',
    13: 'Inne'
}
fig = plot_benefits(benefits_stats, key_mapping, 'Mechanizmy motywacji pracowników')
fig.show()

Jakich narzędzi oceny rekrutacji w procesie rekrutacji używała Państwa firma w tym roku?

In [114]:
#recruitment_stats = calculate_benefits('Z13', years, df_year)
methods = [n for n in range(1, 11)]
methods_stats = {method: dict() for method in methods}
for year, df_y in zip(years[1:], df_year[1:]):
    for method in methods:
        sum_used = df_y[f'Z13_{method}'].eq('Tak').sum()
        methods_stats[method][year] = sum_used
methods_stats

{1: {2020: 21, 2022: 16},
 2: {2020: 31, 2022: 39},
 3: {2020: 9, 2022: 6},
 4: {2020: 38, 2022: 37},
 5: {2020: 22, 2022: 20},
 6: {2020: 9, 2022: 11},
 7: {2020: 5, 2022: 7},
 8: {2020: 14, 2022: 6},
 9: {2020: 0, 2022: 0},
 10: {2020: 0, 2022: 0}}

In [122]:
methods_2018 = [n for n in range(1,20)]
df_y = df_year[0]
year=2018
for n in methods_2018:
    # Z13_20 rzuca keyerror
    sum_used = df_y[f'Z13_{n}'].eq('Tak').sum()
    if n in [i for i in range (12, 20)]:
        methods_stats[8][year] = sum_used
    elif n==20:
        methods_stats[9][year] = sum_used
    elif n==9:
        methods_stats[10][year] = sum_used
    elif n==11:
        continue
    else:
        methods_stats[n][year] = sum_used
methods_stats

{1: {2020: 21, 2022: 16, 2018: 14},
 2: {2020: 31, 2022: 39, 2018: 42},
 3: {2020: 9, 2022: 6, 2018: 18},
 4: {2020: 38, 2022: 37, 2018: 57},
 5: {2020: 22, 2022: 20, 2018: 42},
 6: {2020: 9, 2022: 11, 2018: 20},
 7: {2020: 5, 2022: 7, 2018: 18},
 8: {2020: 14, 2022: 6, 2018: 0},
 9: {2020: 0, 2022: 0, 2018: 5},
 10: {2020: 0, 2022: 0, 2018: 3}}

In [124]:
# Transform data into lists for Plotly
recruitment_stats = methods_stats
key_mapping = {
    1: 'Rozmowa rekrutacyjna (bez standaryzacji pytań, odpowiedzi i ocen)',
    2: ' Wywiad ustrukturyzowany (z wystandaryzowanymi pytaniami, odpowiedziami i ocenami)',
    3: 'CV / Życiorys',
    4: 'List motywacyjny',
    5: 'Test wiedzy związanej ze stanowiskiem pracy',
    6: 'Próbka pracy (wykonanie zadania związanego ze stanowiskiem pracy)',
    7: 'Referencje',
    8: 'Wystandaryzowane testy, np. osobowości, motywacji, zdolności przywódczych',
    9: 'Ośrodek oceny (Assessment Center)',
    10: 'Inne',
}
fig = plot_benefits(recruitment_stats, key_mapping,'Narzędzia oceny kandydatów w procesie rekrutacji')
fig.show()

In [None]:
# import plotly.express as px
# for year in years:
#      #if 'W4' in dfs[year][job_type].columns.tolist():
#     for job_type in job_types:
#         df = dfs[year][job_type]
#         df['T1'] = df['T1'].fillna(0).astype(str)
#         filtered_df = df[df['T1'] == 'Tak']
#         filtered_df = filtered_df[filtered_df['T2'] == 2] # odpowiedziały, ale nie spełniały Państwa oczekiwań
#         df = filtered_df
#         # Calculate value counts for each category
#         value_counts = df['T5'].value_counts().reset_index()
#         value_counts.columns = ['value', 'count']
#         # Create the histogram
#         fig = px.histogram(value_counts, x='value', y='count', title='Histogram of Value Counts')

#         fig.update_layout(
#         xaxis_title='Value',
#         yaxis_title='Count'
#         )

#         # Show plot
#         fig.show()
    