In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import statsmodels.api as sm

# Set default theme for better looking plots
pio.templates.default = 'plotly_white'

# Custom color palette
custom_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']

In [10]:
# Read and prepare the data
df = pd.read_csv(r'C:\Users\lenovo\OneDrive\Desktop\flask\graphs\jobs.csv')

# Clean salary data
df['sal_low'] = pd.to_numeric(df['sal_low'], errors='coerce')
df['sal_high'] = pd.to_numeric(df['sal_high'], errors='coerce')
df['avg_salary'] = (df['sal_low'] + df['sal_high']) / 2

# Format salary for better display
df['formatted_salary'] = df['avg_salary'].apply(lambda x: f'${x:,.0f}')

In [None]:
# Box Plot – Salary Ranges by Job Function
fig1 = px.box(df, x='Job function', y='avg_salary', points='all',
              title='Salary Ranges by Job Function',
              color='Job function')
fig1.update_layout(xaxis_tickangle=-45, yaxis_title='Average Salary')
fig1.show()

AttributeError: 'float' object has no attribute 'expandtabs'

In [12]:
# Enhanced Violin Plot – Salary Distribution by Seniority Level
fig2 = px.violin(df, x='Seniority level', y='avg_salary',
                box=True,
                points='outliers',
                title='Salary Distribution by Seniority Level',
                color='Seniority level',
                color_discrete_sequence=custom_colors)

fig2.update_layout(
    title_x=0.5,
    title_font_size=20,
    showlegend=False,
    xaxis_title='Seniority Level',
    yaxis_title='Salary Distribution (USD)',
    xaxis_tickangle=-45,
    height=600,
    yaxis_tickformat='$,.0f',
    plot_bgcolor='white',
    hoverlabel=dict(bgcolor='white'),
    margin=dict(t=100)
)

fig2.show()

In [13]:
# Enhanced Histogram – Salary Distribution
fig3 = px.histogram(df, x='avg_salary',
                   nbins=50,
                   title='Distribution of Salaries',
                   color_discrete_sequence=['#1f77b4'],
                   opacity=0.7)

fig3.update_layout(
    title_x=0.5,
    title_font_size=20,
    xaxis_title='Salary (USD)',
    yaxis_title='Number of Jobs',
    height=500,
    xaxis_tickformat='$,.0f',
    plot_bgcolor='white',
    bargap=0.1,
    showlegend=False,
    margin=dict(t=100)
)

# Add a kernel density estimate curve
fig3.add_trace(
    go.Scatter(x=df['avg_salary'].sort_values(),
              y=stats.gaussian_kde(df['avg_salary'].dropna())(df['avg_salary'].sort_values()) * len(df['avg_salary']) * (df['avg_salary'].max() - df['avg_salary'].min()) / 50,
              mode='lines',
              name='KDE',
              line=dict(color='#ff7f0e', width=2))
)

fig3.show()

NameError: name 'stats' is not defined

In [None]:
# Enhanced Scatter Plot – Experience vs. Salary
fig4 = px.scatter(df, x='months_experience', y='avg_salary',
                 title='Experience vs. Salary Correlation',
                 color='Job function',
                 size='avg_salary',  # Bubble size based on salary
                 size_max=15,
                 trendline='ols',
                 color_discrete_sequence=custom_colors,
                 hover_data=['title', 'formatted_salary'])

fig4.update_layout(
    title_x=0.5,
    title_font_size=20,
    xaxis_title='Experience (Months)',
    yaxis_title='Salary (USD)',
    height=600,
    yaxis_tickformat='$,.0f',
    plot_bgcolor='white',
    hoverlabel=dict(bgcolor='white'),
    margin=dict(t=100, r=100)
    legend=dict(
        yanchor='top',
        y=0.99,
        xanchor='right',
        x=0.99
    )
)

fig4.show()

In [None]:
# Enhanced Bar Chart – Top 15 Highest Paying Jobs
title_salary = df.groupby('title')['avg_salary'].mean().nlargest(15).reset_index()

fig5 = px.bar(title_salary,
             x='avg_salary',
             y='title',
             orientation='h',
             title='Top 15 Highest Paying Job Titles',
             color='avg_salary',
             color_continuous_scale='viridis',
             text=title_salary['avg_salary'].apply(lambda x: f'${x:,.0f}'))

fig5.update_layout(
    title_x=0.5,
    title_font_size=20,
    xaxis_title='Average Salary (USD)',
    yaxis_title='Job Title',
    height=700,
    xaxis_tickformat='$,.0f',
    plot_bgcolor='white',
    hoverlabel=dict(bgcolor='white'),
    margin=dict(l=200, t=100, r=100)
    showlegend=False
)

fig5.update_traces(
    textposition='auto',
    hovertemplate='<b>%{y}</b><br>Salary: %{text}<extra></extra>'
)

fig5.show()