In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import warnings

warnings.filterwarnings("ignore")

In [2]:
dataset = pd.read_csv("/kaggle/input/lonn-data/kode24s_lonnstall_2023.csv")
dataset = dataset.dropna(subset=['lønn'])

In [3]:
dataset

Unnamed: 0,alder,utdanning,erfaring,jobb,sted,fag,kolleger,lønn,bonus,fornøyd
0,21-25,2.0,5.0,"in-house, privat sektor",Oslo,sikkerhet,100 - 500,350000.0,Ja,Ja
1,26-30,0.0,1.0,"in-house, privat sektor",Innlandet,fullstack,100 - 500,350000.0,Nei,Nei
2,31-35,3.0,14.0,"in-house, privat sektor",Viken,ingen av disse,100 - 500,350000.0,Nei,Nei
3,26-30,,3.0,frilans / selvstendig næringsdrivende,Oslo,frontend,under 10,350000.0,Ja,Nei
4,31-35,4.0,0.0,"in-house, privat sektor",Viken,fullstack,under 10,350000.0,Nei,Nei
...,...,...,...,...,...,...,...,...,...,...
2301,46-50,3.0,25.0,frilans / selvstendig næringsdrivende,Viken,arkitektur,under 10,2500000.0,Nei,Ja
2302,41-45,5.0,17.0,"in-house, privat sektor",Oslo,embedded/IOT/maskinvare,1.000 - 10.000,2950000.0,Ja,Ja
2303,41-45,5.0,20.0,frilans / selvstendig næringsdrivende,Oslo,backend,under 10,2950000.0,Nei,Ja
2304,31-35,5.0,9.0,frilans / selvstendig næringsdrivende,Rogaland,fullstack,over 10.000,3000000.0,Nei,Ja


# Top 10 best betalte

In [4]:
dataset.sort_values(by="lønn", ascending=False).head(10)

Unnamed: 0,alder,utdanning,erfaring,jobb,sted,fag,kolleger,lønn,bonus,fornøyd
2305,46-50,5.0,22.0,"in-house, privat sektor",Oslo,ledelse/administrativt,100 - 500,3500000.0,Ja,Ja
2304,31-35,5.0,9.0,frilans / selvstendig næringsdrivende,Rogaland,fullstack,over 10.000,3000000.0,Nei,Ja
2303,41-45,5.0,20.0,frilans / selvstendig næringsdrivende,Oslo,backend,under 10,2950000.0,Nei,Ja
2302,41-45,5.0,17.0,"in-house, privat sektor",Oslo,embedded/IOT/maskinvare,1.000 - 10.000,2950000.0,Ja,Ja
2301,46-50,3.0,25.0,frilans / selvstendig næringsdrivende,Viken,arkitektur,under 10,2500000.0,Nei,Ja
2300,36-40,5.0,12.0,"in-house, privat sektor",Oslo,ledelse/administrativt,over 10.000,2500000.0,Ja,Ja
2299,26-30,5.0,5.0,frilans / selvstendig næringsdrivende,utlandet,sikkerhet,50 - 100,2500000.0,Ja,Ja
2298,51-55,3.0,29.0,konsulent,Oslo,fullstack,10 - 50,2500000.0,Ja,Ja
2297,31-35,3.0,10.0,frilans / selvstendig næringsdrivende,Oslo,frontend,under 10,2330000.0,Nei,Ja
2295,26-30,3.0,5.0,frilans / selvstendig næringsdrivende,Oslo,fullstack,under 10,2300000.0,Nei,Ja


# Top 10 dårlegast betalte

In [5]:
dataset.sort_values(by="lønn", ascending=False).tail(10)

Unnamed: 0,alder,utdanning,erfaring,jobb,sted,fag,kolleger,lønn,bonus,fornøyd
7,15-20,1.0,0.0,"in-house, privat sektor",Trøndelag,programvare,10 - 50,360000.0,Nei,Nei
10,21-25,0.0,4.0,konsulent,Viken,app,under 10,360000.0,Nei,Nei
11,46-50,,32.0,"in-house, privat sektor",Innlandet,ledelse/administrativt,under 10,360000.0,Ja,Nei
6,26-30,0.0,1.0,"in-house, privat sektor",Troms og Finnmark,fullstack,10 - 50,352000.0,Nei,Nei
3,26-30,,3.0,frilans / selvstendig næringsdrivende,Oslo,frontend,under 10,350000.0,Ja,Nei
2,31-35,3.0,14.0,"in-house, privat sektor",Viken,ingen av disse,100 - 500,350000.0,Nei,Nei
1,26-30,0.0,1.0,"in-house, privat sektor",Innlandet,fullstack,100 - 500,350000.0,Nei,Nei
4,31-35,4.0,0.0,"in-house, privat sektor",Viken,fullstack,under 10,350000.0,Nei,Nei
5,36-40,,10.0,frilans / selvstendig næringsdrivende,Vestland,ingen av disse,under 10,350000.0,Nei,Nei
0,21-25,2.0,5.0,"in-house, privat sektor",Oslo,sikkerhet,100 - 500,350000.0,Ja,Ja


# Best betalte stillinger

In [6]:
avg_salaries_by_job_type = dataset.groupby('fag')['lønn'].mean().reset_index().sort_values(by="lønn", ascending=True)

fig = px.bar(
    avg_salaries_by_job_type,
    y='fag', 
    x='lønn',  
    color='lønn',
    color_continuous_scale='RdYlGn',
    labels={'fag': 'Fag', 'lønn': 'Gjennomsnittsløn (NOK)'}
)

fig.update_layout(
    xaxis_title='Gjennomsnittsløn (NOK)',
    yaxis_title='Fag',
    width=1200,
    height=500
)

for i, row in avg_salaries_by_job_type.iterrows():
    fig.add_annotation(
        x=row['lønn'],
        y=row['fag'],
        text=f'{row["lønn"]:,.0f} NOK',
        showarrow=False,
        font=dict(color='black', size=12),
        xshift=-60 
    )

fig.update_coloraxes(showscale=False)
fig.show()


In [7]:
avg_salary_by_fag = dataset.groupby('fag')['lønn'].mean().reset_index()
sorted_fag = avg_salary_by_fag.sort_values(by='lønn', ascending=True)['fag']
dataset_sorted = dataset.set_index('fag').loc[sorted_fag].reset_index()

fig = px.box(
    dataset_sorted,
    x='lønn',  
    y='fag',    
    width=1200, 
    height=500, 
)

fig.update_layout(
    xaxis_title='Løn (NOK)', 
    yaxis_title='Fag',      
    xaxis_range=[0, 2_400_000], 
)

fig.show()

# Kvar lønnar det seg å jobbe?

In [8]:
avg_salaries_by_city = dataset.groupby('sted')['lønn'].mean().reset_index().sort_values(by="lønn", ascending=True)

fig = px.bar(
    avg_salaries_by_city,
    y='sted', 
    x='lønn',  
    color='lønn',
    color_continuous_scale='RdYlGn',
    labels={'sted': 'Stad', 'lønn': 'Gjennomsnittsløn (NOK)'}
)

fig.update_layout(
    xaxis_title='Gjennomsnittsløn (NOK)',
    yaxis_title='Fag',
    width=1200,
    height=500
)

for i, row in avg_salaries_by_city.iterrows():
    fig.add_annotation(
        x=row['lønn'],
        y=row['sted'],
        text=f'{row["lønn"]:,.0f} NOK',
        showarrow=False,
        font=dict(color='black', size=12),
        xshift=-60 
    )

fig.update_coloraxes(showscale=False)
fig.show()


In [9]:
avg_salary_by_city = dataset.groupby('sted')['lønn'].mean().reset_index()
sorted_city = avg_salary_by_city.sort_values(by='lønn', ascending=True)['sted']
dataset_sorted = dataset.set_index('sted').loc[sorted_city].reset_index()

fig = px.box(
    dataset_sorted,
    x='lønn',  
    y='sted',    
    width=1200, 
    height=500, 
)

fig.update_layout(
    xaxis_title='Løn (NOK)', 
    yaxis_title='sted',      
    xaxis_range=[0, 2_400_000], 
)

fig.show()

# Erfaring og utdanning

In [10]:
cleaned_data = dataset.dropna(subset=['erfaring', 'lønn'])
cleaned_data['lønn'] = cleaned_data['lønn'].astype(float)
avg_salary_by_experience = cleaned_data.groupby('erfaring')['lønn'].mean().reset_index()
lowest_point = avg_salary_by_experience.loc[avg_salary_by_experience['lønn'].idxmin()]
highest_point = avg_salary_by_experience.loc[avg_salary_by_experience['lønn'].idxmax()]

fig = px.line(
    avg_salary_by_experience,
    x='erfaring',
    y='lønn',
    labels={'erfaring': 'Erfaring', 'lønn': 'Gjennomsnittsløn (NOK)'}
)


fig.add_trace(
    go.Scatter(
        x=[lowest_point['erfaring']],
        y=[lowest_point['lønn']],
        mode='markers+text',
        marker=dict(color='red', size=10),
        name='Lågast løn',
        text=[f'Lågast løn: {lowest_point["lønn"]:,.0f} NOK, Erfaring: {int(lowest_point["erfaring"])} år'],
        textfont=dict(size=14, color='red'),
        textposition='bottom right'
    )
)
fig.add_trace(
    go.Scatter(
        x=[highest_point['erfaring']],
        y=[highest_point['lønn']],
        mode='markers+text',
        marker=dict(color='green', size=10),
        name='Høgest løn',
        text=[f'Høgast løn: {highest_point["lønn"]:,.0f} NOK, Erfaring: {int(highest_point["erfaring"])} år'],
        textfont=dict(size=14, color='green'),
        textposition='top center'
    )
)

fig.update_layout(
    xaxis_title="Erfaring",
    yaxis_title="Gjennomsnittsløn (NOK)",
    width=1200,
    height=500
)

fig.update_layout(showlegend=False)
fig.show()

In [11]:
cleaned_data = dataset.dropna(subset=['utdanning', 'lønn'])
cleaned_data['lønn'] = cleaned_data['lønn'].astype(float)
avg_salary_by_edu = cleaned_data.groupby('utdanning')['lønn'].mean().reset_index()
lowest_point = avg_salary_by_edu.loc[avg_salary_by_edu['lønn'].idxmin()]
highest_point = avg_salary_by_edu.loc[avg_salary_by_edu['lønn'].idxmax()]

fig = px.line(
    avg_salary_by_edu,
    x='utdanning',
    y='lønn',
    labels={'utdanning': 'Utdanning', 'lønn': 'Gjennomsnittslønn (NOK)'}
)


fig.add_trace(
    go.Scatter(
        x=[lowest_point['utdanning']],
        y=[lowest_point['lønn']],
        mode='markers+text',
        marker=dict(color='red', size=10),
        name='Lågast løn',
        text=[f'Lågast løn: {lowest_point["lønn"]:,.0f} NOK, Utdanning: {int(lowest_point["utdanning"])} år'],
        textfont=dict(size=14, color='red'),
        textposition='bottom right'
    )
)
fig.add_trace(
    go.Scatter(
        x=[highest_point['utdanning']],
        y=[highest_point['lønn']],
        mode='markers+text',
        marker=dict(color='green', size=10),
        name='Høgest løn',
        text=[f'Høgast løn: {highest_point["lønn"]:,.0f} NOK, Utdanning: {int(highest_point["utdanning"])} år'],
        textfont=dict(size=14, color='green'),
        textposition='top left'
    )
)

fig.update_layout(
    xaxis_title="Utdanning",
    yaxis_title="Gjennomsnittsløn (NOK)",
    width=1200,
    height=500
)

fig.update_layout(showlegend=False)
fig.show()

# Privat, offentleg, konsulent, eller frilans

In [12]:
avg_salaries_by_employer = dataset.groupby('jobb')['lønn'].mean().reset_index().sort_values(by="lønn", ascending=True)

fig = px.bar(
    avg_salaries_by_employer,
    y='jobb', 
    x='lønn',  
    color='lønn',
    color_continuous_scale='RdYlGn',
    labels={'jobb': 'Jobb', 'lønn': 'Gjennomsnittsløn (NOK)'}
)

fig.update_layout(
    xaxis_title='Gjennomsnittsløn (NOK)',
    yaxis_title='Jobb',
    width=1200,
    height=500
)

for i, row in avg_salaries_by_employer.iterrows():
    fig.add_annotation(
        x=row['lønn'],
        y=row['jobb'],
        text=f'{row["lønn"]:,.0f} NOK',
        showarrow=False,
        font=dict(color='black', size=12),
        xshift=-60 
    )

fig.update_coloraxes(showscale=False)
fig.show()


In [13]:
avg_salaries_by_employer = dataset.groupby('jobb')['lønn'].mean().reset_index()
sorted_city = avg_salaries_by_employer.sort_values(by='lønn', ascending=True)['jobb']
dataset_sorted = dataset.set_index('jobb').loc[sorted_city].reset_index()

fig = px.box(
    dataset_sorted,
    x='lønn',  
    y='jobb',    
    width=1200, 
    height=500, 
)

fig.update_layout(
    xaxis_title='Løn (NOK)', 
    yaxis_title='Jobb',      
)

fig.show()