In [None]:
\list

## Exploring 'keywords_all' Column
A brief exploration of the 'keywords_all' column in the 'data_nerd_jobs' table, focusing on the structure and content of the data.

In [None]:
SELECT keywords_all FROM public_job_listings.data_nerd_jobs;

## Unpacking JSON Objects in 'keywords_all'
Extracting job skills from the JSON objects in the 'keywords_all' column of the 'data_nerd_jobs' table.

In [None]:
SELECT
  keyword.element
FROM
  public_job_listings.data_nerd_jobs,
  UNNEST(keywords_all.list) AS keyword
LIMIT 100;

## Median Salary for Top 10 Skills
Calculating the median salary for the top 10 skills listed in the 'keywords_all' column, using the 'salary_year' column from the 'data_nerd_jobs' table.

## Median Salary for Top 10 Skills
Calculating the median salary for the top 10 skills listed in the 'keywords_all' column using the 'salary_year' column.

In [None]:
WITH TopSkills AS (
  SELECT
    keyword.element AS skill,
    COUNT(*) AS count
  FROM
    public_job_listings.data_nerd_jobs,
    UNNEST(keywords_all.list) AS keyword
  GROUP BY skill
  ORDER BY COUNT(*) DESC
  LIMIT 10
),
MedianSalaries AS (
  SELECT
    ts.skill,
    PERCENTILE_CONT(salary_year, 0.5) OVER (PARTITION BY ts.skill) AS median_salary,
    MAX(ts.count) OVER (PARTITION BY ts.skill) AS skill_count
  FROM
    public_job_listings.data_nerd_jobs,
    UNNEST(keywords_all.list) AS keyword
  JOIN TopSkills ts ON keyword.element = ts.skill
)
SELECT
  skill,
  median_salary,
  skill_count
FROM
  MedianSalaries
GROUP BY skill, median_salary, skill_count;

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Setting the plot style
sns.set(style="darkgrid")
plt.style.use("dark_background")

# Preparing data
data = sql_df_ghfh[['skill', 'median_salary', 'skill_count']].sort_values('median_salary', ascending=False)

# Creating the bar plot
plt.figure(figsize=(12, 8))
sns.barplot(x='median_salary', y='skill', data=data, palette="Blues_r")

# Adding the count inside the bars
for index, value in enumerate(data['median_salary']):
    plt.text(value, index, str(data['skill_count'].iloc[index]), color='white', va='center')

plt.title('Median Salary and Job Postings for Top 10 Skills')
plt.xlabel('Median Salary ($)')
plt.ylabel('Skill')
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Setting the plot style
sns.set(style="darkgrid")
plt.style.use("dark_background")

# Preparing data
data = sql_df_ghfh[['skill', 'median_salary', 'skill_count']].sort_values('median_salary', ascending=False)

# Creating the bar plot
plt.figure(figsize=(12, 8))
sns.barplot(x='median_salary', y='skill', data=data, palette="Blues_r")

# Adding the count inside the bars
for index, value in enumerate(data['median_salary']):
    plt.text(value, index, f'{data["skill_count"].iloc[index]} postings', color='white', va='center')

plt.title('Median Salary for Top 10 Skills with Job Postings Count')
plt.xlabel('Median Salary ($)')
plt.ylabel('Skill')
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Setting the plot style
sns.set(style="darkgrid")
plt.style.use("dark_background")

# Preparing data
data = sql_df_ghfh[['skill', 'median_salary', 'skill_count']].sort_values('median_salary', ascending=False)

# Creating the bar plot
plt.figure(figsize=(14, 10))
sns.barplot(x='median_salary', y='skill', data=data, palette="Blues_r")

# Adding the count inside the bars
for index, value in enumerate(data['median_salary']):
    plt.text(value - 5000, index, f'{data["skill_count"].iloc[index]} postings', color='white', va='center')

plt.title('Median Salary for Top 10 Skills with Job Postings Count')
plt.xlabel('Median Salary ($)')
plt.ylabel('Skill')
plt.show()

## Median Salary for All Skills
Calculating the median salary for all skills listed in the 'keywords_all' column using the 'salary_year' column.

In [None]:
SELECT
  keyword.element AS skill,
  PERCENTILE_CONT(salary_year, 0.5) OVER (PARTITION BY keyword.element) AS median_salary
FROM
  public_job_listings.data_nerd_jobs,
  UNNEST(keywords_all.list) AS keyword;