## List Tables
Exploring the tables in the 'Data Nerd Jobs' data connection.

In [None]:
\list

## Explore 'keywords_all' Column
Analyzing the 'keywords_all' column in the 'data_nerd_jobs' table.

In [None]:
SELECT keyword.element, COUNT(*) as count
FROM `public_job_listings.data_nerd_jobs`, UNNEST(keywords_all.list) as keyword
GROUP BY keyword.element
ORDER BY count DESC
LIMIT 10

## Plot Median Salary for Top 10 Skills
Visualizing the median salary for the top 10 skills with salary data.

In [None]:
WITH SkillSalary AS (
  SELECT
    keyword.element AS skill,
    salary_year
  FROM
    `public_job_listings.data_nerd_jobs`,
    UNNEST(keywords_all.list) AS keyword
  WHERE
    salary_year IS NOT NULL
),
TopSkills AS (
  SELECT skill
  FROM SkillSalary
  GROUP BY skill
  ORDER BY COUNT(*) DESC
  LIMIT 10
),
MedianSalaries AS (
  SELECT
    skill,
    PERCENTILE_CONT(salary_year, 0.5) OVER (PARTITION BY skill) AS median_salary,
    COUNT(salary_year) OVER (PARTITION BY skill) AS salary_count
  FROM
    SkillSalary
  WHERE
    skill IN (SELECT skill FROM TopSkills)
)
SELECT
  DISTINCT skill,
  median_salary,
  salary_count
FROM
  MedianSalaries
ORDER BY
  median_salary DESC

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style='darkgrid')
plt.style.use('dark_background')

# Load the dataframe
df = sql_df_fkcs

# Plotting the median salary for the top 10 skills
plt.figure(figsize=(10, 8))
salary_plot = sns.barplot(x='median_salary', y='skill', data=df, palette='Blues_r', order=df.sort_values('median_salary', ascending=False)['skill'])
plt.title('Median Salary by Skill')
plt.xlabel('Median Salary')
plt.ylabel('Skill')
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style='darkgrid')
plt.style.use('dark_background')

# Assuming 'sql_df_fkcs' is the dataframe containing the relevant data
plt.figure(figsize=(10, 8))
barplot = sns.barplot(x='median_salary', y='skill', data=sql_df_fkcs, palette='Blues_r', order=sql_df_fkcs.sort_values('median_salary', ascending=False).skill)
plt.title('Median Salary for Top 10 Skills')
plt.xlabel('Median Salary')
plt.ylabel('Skill')
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
sns.set(style='darkgrid')
plt.style.use('dark_background')

# Load the dataframe
df = sql_df_fkcs

# Plotting the median salary for the top 10 skills with modifications
plt.figure(figsize=(10, 8))
salary_plot = sns.barplot(x='median_salary', y='skill', data=df, palette='Blues_r', order=df.sort_values('median_salary', ascending=False)['skill'])

# Formatting the x-axis
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
salary_plot.xaxis.set_major_formatter(tick)
plt.xlim(80000, 160000)

# Removing the axis labels
salary_plot.set(xlabel=None, ylabel=None)

# Setting the title and making it bigger
plt.title('Median Salary by Skill', fontsize=20)

# Adding the count of job postings at the end of the bars
for p in salary_plot.patches:
    salary_plot.annotate(f'{int(p.get_width())} postings',
                   (p.get_width(), p.get_y() + p.get_height() / 2),
                   ha='left', va='center',
                   size=12, color='white',
                   xytext=(5, 0),
                   textcoords='offset points')

plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

sns.set(style='darkgrid')
plt.style.use('dark_background')

# Load the dataframe
df = sql_df_fkcs

# Plotting the median salary for the top 10 skills with modifications
plt.figure(figsize=(10, 8))
salary_plot = sns.barplot(x='median_salary', y='skill', data=df, palette='Blues_r', order=df.sort_values('median_salary', ascending=False)['skill'])

# Formatting the x-axis with dollar sign and comma for thousands
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
salary_plot.xaxis.set_major_formatter(tick)

# Setting the x-axis limits
salary_plot.set_xlim(85000, 155000)

# Removing the axis labels
salary_plot.set_xlabel('')
salary_plot.set_ylabel('')

# Setting the title and making it bigger
salary_plot.set_title('Median Salary by Skill', fontsize=20)

# Adding the count of job postings at the end of the bars in black font and making it bigger
for p in salary_plot.patches:
    width = p.get_width()
    height = p.get_height()
    skill = df.iloc[int(p.get_y() + 0.5)]['skill']
    count = df[df['skill'] == skill]['salary_count'].iloc[0]
    x_position = width - 5000 if width > 90000 else width + 5000
    y_position = p.get_y() + height / 2
    salary_plot.annotate(f'{count} postings',
                   (x_position, y_position),
                   ha = 'center', va = 'center',
                   color = 'black',
                   size=10)

plt.show()

## Explore Salary vs Skills
Visualizing the how top skills relate to salary in a scatter plot

In [None]:
WITH SkillSalary AS (
  SELECT
    keyword.element AS skill,
    salary_year
  FROM
    `public_job_listings.data_nerd_jobs`,
    UNNEST(keywords_all.list) AS keyword
  WHERE
    salary_year IS NOT NULL
),
TopSkills AS (
  SELECT skill
  FROM SkillSalary
  GROUP BY skill
  ORDER BY COUNT(*) DESC
--  LIMIT 10
),
MedianSalaries AS (
  SELECT
    skill,
    PERCENTILE_CONT(salary_year, 0.5) OVER (PARTITION BY skill) AS median_salary,
    COUNT(salary_year) OVER (PARTITION BY skill) AS salary_count
  FROM
    SkillSalary
  WHERE
    skill IN (SELECT skill FROM TopSkills)
)
SELECT
  DISTINCT skill,
  median_salary,
  salary_count
FROM
  MedianSalaries
ORDER BY
  median_salary DESC

In [None]:
import plotly.express as px

# Load the dataframe
df = sql_df_fkcs

# Create the scatter plot
fig = px.scatter(df, x='median_salary', y='salary_count',
                 hover_data=['skill'],
                 title='Scatterplot of Median Salary vs. Job Postings Count',
                 labels={'median_salary': 'Median Salary', 'salary_count': 'Job Postings Count'},
                 color='skill',
                 color_continuous_scale=px.colors.sequential.Viridis)

# Show the plot
fig.show()

In [None]:
!pip install -q plotly
import plotly.express as px

# Load the dataframe
df = sql_df_1

# Create the scatter plot
fig = px.scatter(df, x='median_salary', y='salary_count',
                 hover_data=['skill'],
                 title='Median Salary vs. Job Posting Count',
                 labels={'median_salary': 'Median Salary', 'salary_count': 'Job Posting Count'},
                 template='plotly_dark')

# Show the plot
fig.show()