In [None]:
\list

## Exploring 'keywords_all' Column
A brief exploration of the 'keywords_all' column in the 'data_nerd_jobs' table, focusing on the structure and content of the data.

In [None]:
SELECT keywords_all FROM public_job_listings.data_nerd_jobs;

## Unpacking JSON Objects in 'keywords_all'
Extracting job skills from the JSON objects in the 'keywords_all' column of the 'data_nerd_jobs' table.

In [None]:
SELECT
  keyword.element
FROM
  public_job_listings.data_nerd_jobs,
  UNNEST(keywords_all.list) AS keyword
LIMIT 100;

## Median Salary for Top 10 Skills
Calculating the median salary for the top 10 skills listed in the 'keywords_all' column, using the 'salary_year' column from the 'data_nerd_jobs' table.

## Median Salary for Top 10 Skills
Calculating the median salary for the top 10 skills listed in the 'keywords_all' column using the 'salary_year' column.

In [None]:
WITH TopSkills AS (
  SELECT
    keyword.element AS skill,
    COUNT(*) AS count
  FROM
    public_job_listings.data_nerd_jobs,
    UNNEST(keywords_all.list) AS keyword
  GROUP BY skill
  ORDER BY COUNT(*) DESC
  LIMIT 10
),
MedianSalaries AS (
  SELECT
    ts.skill,
    PERCENTILE_CONT(salary_year, 0.5) OVER (PARTITION BY ts.skill) AS median_salary,
    MAX(ts.count) OVER (PARTITION BY ts.skill) AS skill_count
  FROM
    public_job_listings.data_nerd_jobs,
    UNNEST(keywords_all.list) AS keyword
  JOIN TopSkills ts ON keyword.element = ts.skill
)
SELECT
  skill,
  median_salary,
  skill_count
FROM
  MedianSalaries
GROUP BY skill, median_salary, skill_count;

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Setting the plot style
sns.set(style="darkgrid")
plt.style.use("dark_background")

# Preparing data
data = sql_df_ghfh[['skill', 'median_salary', 'skill_count']].sort_values('median_salary', ascending=False)

# Creating the bar plot
plt.figure(figsize=(12, 8))
sns.barplot(x='median_salary', y='skill', data=data, palette="Blues_r")

# Adding the count inside the bars
for index, value in enumerate(data['median_salary']):
    plt.text(value, index, str(data['skill_count'].iloc[index]), color='white', va='center')

plt.title('Median Salary and Job Postings for Top 10 Skills')
plt.xlabel('Median Salary ($)')
plt.ylabel('Skill')
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Setting the plot style
sns.set(style="darkgrid")
plt.style.use("dark_background")

# Preparing data
data = sql_df_ghfh[['skill', 'median_salary', 'skill_count']].sort_values('median_salary', ascending=False)

# Creating the bar plot
plt.figure(figsize=(12, 8))
sns.barplot(x='median_salary', y='skill', data=data, palette="Blues_r")

# Adding the count inside the bars
for index, value in enumerate(data['median_salary']):
    plt.text(value, index, f'{data["skill_count"].iloc[index]} postings', color='white', va='center')

plt.title('Median Salary for Top 10 Skills with Job Postings Count')
plt.xlabel('Median Salary ($)')
plt.ylabel('Skill')
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Setting the plot style
sns.set(style="darkgrid")
plt.style.use("dark_background")

# Preparing data
data = sql_df_ghfh[['skill', 'median_salary', 'skill_count']].sort_values('median_salary', ascending=False)

# Creating the bar plot
plt.figure(figsize=(14, 10))
sns.barplot(x='median_salary', y='skill', data=data, palette="Blues_r")

# Adding the count inside the bars
for index, value in enumerate(data['median_salary']):
    plt.text(value - 5000, index, f'{data["skill_count"].iloc[index]} postings', color='white', va='center')

plt.title('Median Salary for Top 10 Skills with Job Postings Count')
plt.xlabel('Median Salary ($)')
plt.ylabel('Skill')
plt.show()

## Median Salary for All Skills
Calculating the median salary for all skills listed in the 'keywords_all' column using the 'salary_year' column.

In [None]:
SELECT
  skill,
  COUNT(*) AS skill_count,
  ROUND(AVG(salary_year)) AS median_salary
FROM (
  SELECT
    keyword.element AS skill,
    salary_year
  FROM
    public_job_listings.data_nerd_jobs,
    UNNEST(keywords_all.list) AS keyword
)
GROUP BY skill;

In [None]:
!pip install plotly

In [None]:
import plotly.express as px

# Preparing data
plot_data = sql_df_xcmj.dropna()

# Creating the scatter plot
fig = px.scatter(plot_data, x='median_salary', y='skill_count', hover_data=['skill'],
                 title='Scatter Plot of Salary vs Count', labels={'median_salary': 'Median Salary', 'skill_count': 'Skill Count'})

fig.show()

In [None]:
SELECT
  job_title_final,
  keyword.element AS skill,
  COUNT(*) AS skill_count,
  ROUND(AVG(salary_year)) AS median_salary
FROM
  public_job_listings.data_nerd_jobs,
  UNNEST(keywords_all.list) AS keyword
GROUP BY job_title_final, skill;

In [None]:
SELECT
  keyword.element AS skill,
  job_title_final,
  COUNT(*) AS skill_count,
  ROUND(AVG(salary_year)) AS median_salary
FROM
  public_job_listings.data_nerd_jobs,
  UNNEST(keywords_all.list) AS keyword
GROUP BY skill, job_title_final;

In [None]:
import plotly.express as px

# Preparing data
plot_data = sql_df_lhcs.dropna()

# Creating the scatter plot with a dropdown for job titles
fig = px.scatter(plot_data, x='median_salary', y='skill_count', color='job_title_final', hover_data=['skill', 'job_title_final'],
                 title='Scatter Plot of Salary vs Count with Job Title Filter', labels={'median_salary': 'Median Salary', 'skill_count': 'Skill Count'})

# Adding dropdown for job titles
job_titles = plot_data['job_title_final'].unique()
fig.update_layout(
    updatemenus=[
        dict(
            buttons=list([
                dict(args=[{'visible': [job_title == val for val in plot_data['job_title_final']]}],
                     label=job_title,
                     method='restyle'
                ) for job_title in job_titles
            ]),
            direction='down',
            showactive=True,
        )
    ],
    showlegend=False
)

fig.show()

In [None]:
import plotly.express as px

# Preparing data
plot_data = sql_df_lhcs.dropna()

# Creating the scatter plot with a dropdown for job titles
fig = px.scatter(plot_data, x='median_salary', y='skill_count', color='job_title_final', hover_data=['skill', 'job_title_final'],
                 title='Scatter Plot of Salary vs Count with Job Title Filter', labels={'median_salary': 'Median Salary', 'skill_count': 'Skill Count'})

# Adding dropdown for job titles
job_titles = plot_data['job_title_final'].unique()
buttons = [dict(label='All', method='restyle', args=[{'visible': [True] * len(plot_data)}])]
buttons += [dict(label=job_title, method='restyle', args=[{'visible': [job_title == val for val in plot_data['job_title_final']]}]) for job_title in job_titles]

fig.update_layout(
    updatemenus=[dict(buttons=buttons, direction='down', showactive=True)],
    showlegend=False
)

fig.show()

In [None]:
import plotly.express as px

# Preparing data
plot_data = sql_df_lhcs.dropna()

# Creating the scatter plot
fig = px.scatter(plot_data, x='median_salary', y='skill_count', color='job_title_final', hover_data=['skill', 'job_title_final'],
                 title='Scatter Plot of Salary vs Count', labels={'median_salary': 'Median Salary', 'skill_count': 'Skill Count'})

# Adding dropdown for job titles
job_titles = plot_data['job_title_final'].unique()
fig.update_traces(visible=False)  # Initially hide all

# Create a button for each job title
buttons = []
for job_title in job_titles:
    buttons.append(
        dict(
            label=job_title,
            method='update',
            args=[{'visible': [job_title == val for val in plot_data['job_title_final']]}]
        )
    )

# Add a button to show all
buttons.append(
    dict(
        label='All',
        method='update',
        args=[{'visible': [True] * len(plot_data)}]
    )
)

# Update layout with buttons
fig.update_layout(
    updatemenus=[dict(active=0, buttons=buttons)],
    showlegend=False
)

fig.show()

In [None]:
import plotly.express as px

# Preparing data
plot_data = sql_df_lhcs.dropna()

# Creating the scatter plot
fig = px.scatter(plot_data, x='median_salary', y='skill_count', hover_data=['skill', 'job_title_final'],
                 title='Scatter Plot of Salary vs Count', labels={'median_salary': 'Median Salary', 'skill_count': 'Skill Count'},
                 color='job_title_final', color_discrete_sequence=px.colors.qualitative.Plotly)

# Update layout
fig.update_layout(showlegend=False)

fig.show()

In [None]:
import plotly.express as px

# Preparing data
plot_data = sql_df_lhcs.dropna()

# Creating the scatter plot
fig = px.scatter(plot_data, x='median_salary', y='skill_count', color='job_title_final', hover_data=['skill', 'job_title_final'],
                 title='Scatter Plot of Salary vs Count', labels={'median_salary': 'Median Salary', 'skill_count': 'Skill Count'})

# Update layout to remove the legend
fig.update_layout(showlegend=False)

fig.show()

In [None]:
import plotly.express as px
import plotly.graph_objects as go

# Preparing data
plot_data = sql_df_lhcs.dropna()

# Creating the scatter plot
fig = go.Figure()

# Adding traces for each job title
job_titles = plot_data['job_title_final'].unique()
for job_title in job_titles:
    filtered_data = plot_data[plot_data['job_title_final'] == job_title]
    fig.add_trace(go.Scatter(x=filtered_data['median_salary'], y=filtered_data['skill_count'],
                             mode='markers', name=job_title,
                             hovertext=filtered_data['skill']))

# Update layout
fig.update_layout(title='Scatter Plot of Salary vs Count with Job Title Filter',
                  xaxis_title='Median Salary',
                  yaxis_title='Skill Count',
                  showlegend=False)

fig.show()