## List Tables
Exploring the tables in the 'Data Nerd Jobs' data connection.

In [None]:
\list

## Explore 'keywords_all' Column
Analyzing the 'keywords_all' column in the 'data_nerd_jobs' table.

In [None]:
SELECT keyword.element, COUNT(*) as count
FROM `public_job_listings.data_nerd_jobs`, UNNEST(keywords_all.list) as keyword
GROUP BY keyword.element
ORDER BY count DESC
LIMIT 10

In [None]:
WITH SkillSalary AS (
  SELECT
    keyword.element AS skill,
    salary_year
  FROM
    `public_job_listings.data_nerd_jobs`,
    UNNEST(keywords_all.list) AS keyword
  WHERE
    salary_year IS NOT NULL
),
MedianSalaries AS (
  SELECT
    skill,
    PERCENTILE_CONT(salary_year, 0.5) OVER (PARTITION BY skill) AS median_salary
  FROM
    SkillSalary
)
SELECT
  skill,
  median_salary
FROM
  MedianSalaries
GROUP BY
  skill, median_salary
ORDER BY
  median_salary DESC
LIMIT 10

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import FuncFormatter

# Set the aesthetic style of the plots
sns.set(style='darkgrid')
plt.style.use('dark_background')

# Load the data into a pandas dataframe
data = {
    'skill': ['SQL', 'Python', 'Java', 'AWS', 'R', 'Tableau', 'Excel', 'Power BI', 'SAS', 'Hadoop'],
    'median_salary': [111524, 118225, 128591, 128591, 108344, 108344, 111524, 108344, 108344, 128591],
    'job_count': [896, 561, 527, 527, 539, 539, 539, 539, 539, 527]
}
df = pd.DataFrame(data)

# Sort the dataframe by median_salary
df_sorted = df.sort_values('median_salary', ascending=False)

# Formatter for currency
formatter = FuncFormatter(lambda x, pos: f'${x:,.0f}')

# Create the bar plot
plt.figure(figsize=(10, 8))
barplot = sns.barplot(x='median_salary', y='skill', data=df_sorted, palette='Blues_r')

# Set x-axis limit
plt.xlim(105000, 130000)

# Format the x-axis as currency
barplot.xaxis.set_major_formatter(formatter)

# Add the job count values on the bars
for index, value in enumerate(df_sorted['job_count']):
    plt.text(df_sorted['median_salary'].iloc[index], index, f'{value}')

# Remove the labels
plt.xlabel('')
plt.ylabel('')

# Show the plot
plt.show()