In [None]:
import plotly as pl
import plotly.express as px
import pandas as pd

In [None]:
H2 = pd.read_csv("H2.csv")
H1 = pd.read_csv("H1.csv")
H2

In [None]:
fig = px.bar(
    H1,
    x='job_category',
    y='salary_per_year',
    title='Average Salary per Job Category',
    labels={'salary_per_year': 'Salary per Year (USD)', 'job_category': 'Job Category'},
    text='salary_per_year'  # Show salary values on bars
)

# Show plot with salary labels
fig.update_traces(
    texttemplate='$%{text:,.2f}',  # Adds commas and limits to 2 decimals
    textposition='outside'
)
fig.update_yaxes(range=[0, max(H1['salary_per_year']) * 1.1])  # Add padding to y-axis
fig.show()

In [None]:
fig = px.bar(
    H2,
    x='employment_type',
    y='average_salary',
    title='Average Salary per Job',
    labels={'average_salary': 'Salary per Year (USD)', 'employment_type': 'Employment Type'},
    text='average_salary'  # Show salary values on bars
)

# Show plot with salary labels
fig.update_traces(
    texttemplate='$%{text:,.2f}',  # Adds commas and limits to 2 decimals
    textposition='outside'
)
fig.update_yaxes(range=[0, max(H2['average_salary']) * 1.1])  # Add padding to y-axis
fig.show()

In [None]:
H3 = pd.read_csv("H3_company_location_remote_ES.csv")
filtered_H3 = H3[H3['job_title'] == "Data Analyst"]
filtered_H3_sorted = filtered_H3.sort_values(by='salary_per_year', ascending=False)
filtered_H3_sorted.reset_index()

In [None]:
fig = px.bar(
    filtered_H3_sorted,
    x='company_location',
    y='salary_per_year',
    color='company_location',  # Optional, to distinguish locations by color
    title='REMOTE Data Analyst Salary by Location',
    labels={'salary_per_year': 'Salary per Year (USD)', 'company_location': 'Company Location'},
    text='salary_per_year'  # Display salary values on the bars
)

# Add salary labels on the bars
fig.update_traces(
    texttemplate='$%{text:,.2f}',  # Adds commas and formats to 2 decimals
    textposition='outside'
)

# Update y-axis to add padding
fig.update_yaxes(range=[0, max(filtered_H3_sorted['salary_per_year']) * 1.1])

# Show the plot
fig.show()

In [None]:
H3_percentage_ALV = pd.read_csv("H3_cost_comp_between_avg_sal_vs_comp_avg_sal(ALV).csv")

In [None]:
H3_percentage_ALV_filtered = H3_percentage_ALV[H3_percentage_ALV["employee_residence_country"]== "Germany"]

In [None]:
H3_percentage_ALV_filtered = H3_percentage_ALV[(H3_percentage_ALV["work_setting"] == "Remote") & 
                                                (H3_percentage_ALV["employee_residence_country"] == "Germany")]

# Display the filtered DataFrame
H3_percentage_ALV_filtered

In [None]:
H3_percentage_ALV_filtered_UK = H3_percentage_ALV[(H3_percentage_ALV["work_setting"] == "Remote") & 
                                                (H3_percentage_ALV["employee_residence_country"] == "United Kingdom")]

# Display the filtered DataFrame
H3_percentage_ALV_filtered_UK

In [None]:
H3_percentage_ALV_filtered_India = H3_percentage_ALV[(H3_percentage_ALV["work_setting"] == "Remote") & 
                                                (H3_percentage_ALV["employee_residence_country"] == "India")]

# Display the filtered DataFrame
H3_percentage_ALV_filtered_India

In [None]:
fig = px.bar(
    H3_percentage_ALV_filtered,
    x='company_location_country',
    y=['avg_salary', 'current_location_avg_salary', 'savings_percentage'],
    title='Salary and Savings Comparison by Company Location',
    labels={
        'avg_salary': 'Avg Salary (USD)',
        'current_location_avg_salary': 'Current Location Avg Salary (USD)',
        'savings_percentage': 'Savings Percentage'
    },
    barmode='group'
)

fig.show()

In [None]:
H3_company_location_ES = pd.read_csv("H3_company_location_ES.csv")
filtered_df = H3_company_location_ES[H3_company_location_ES['job_title'] == 'Data Analyst']
filtered_df = filtered_df.sort_values(by='salary_per_year', ascending=False)

In [None]:
fig = px.bar(
    filtered_df,
    x='company_location',  # X-axis: company locations
    y='salary_per_year',   # Y-axis: salary per year
    color='company_location',  # Optional: differentiate by color
    title='Data Analyst Salary by Location',
    labels={'salary_per_year': 'Salary per Year (USD)', 'company_location': 'Company Location'}
)

# Show the plot
fig.show()

In [None]:
H3_company_location_ES_sorted = H3_company_location_ES.sort_values(by='salary_per_year', ascending=False).head(10)

# Define custom color for Japan
color_map = {'Japan': 'blue'}

# Create the bar chart
fig = px.bar(
    H3_company_location_ES_sorted,
    x='company_location',  # X-axis: company locations
    y='salary_per_year',   # Y-axis: salary per year
    color='company_location',  # Color by company location
    title='Top 10 Salaries by Company Location',
    labels={'salary_per_year': 'Salary per Year (USD)', 'company_location': 'Company Location'},
    color_discrete_map=color_map  # Assign blue color to Japan
)

# Show the plot
fig.show()

In [None]:
H4_1 = pd.read_csv("H3_company_location_setting_ES.csv")

In [None]:
H4 = pd.read_csv("H3_avg_salary_by_worksetting_employee_country(ALV).csv")

In [None]:
fig = px.bar(H4_1, x='work_setting', y='salary_per_year', 
             title='Average Salary by Work Setting',
             labels={'salary_per_year': 'Average Salary (USD)', 'work_setting': 'Work Setting'})



# Increase the margin to give more space for labels
fig.update_layout(margin=dict(t=50, b=100))

# Show the plot
fig.show()

In [None]:
fig = px.bar(
    H4,
    x='employee_residence_country',  # X-axis: country
    y='avg_salary',                  # Y-axis: average salary
    color='work_setting',            # Color by work setting
    title='Average Salary by Work Setting and Country',
    labels={'avg_salary': 'Average Salary (USD)', 'employee_residence_country': 'Country'},
    barmode='group'                  # Group bars by work setting
)

# Show the plot
fig.show()

In [None]:
data = pd.read_csv("H1_avg_salary_byjob_title_in_categories(ALV).csv")

In [None]:
fig = px.bar(
    data,
    x='category_name',        # X-axis: category name
    y='avg_salary',           # Y-axis: average salary
    color='job_title',        # Color by job title
    barmode='group',          # Group bars for each category
    title='Average Salary by Job Title in Each Category',
    labels={'avg_salary': 'Average Salary per year (USD)', 'category_name': 'Category'}
)

# Show the plot
fig.show()