## List Tables in Data Connection
Exploring tables in the data connection 'Data Nerd Jobs' using the command '\list'.

In [None]:
\list

In [None]:
SELECT keywords_all FROM public_job_listings.data_nerd_jobs LIMIT 5;

In [None]:
SELECT list.element AS unpacked_keywords
FROM public_job_listings.data_nerd_jobs, UNNEST(keywords_all.list) AS list
LIMIT 5;

## Skill Popularity Analysis
Analyzing the most popular skills in the `keywords_all` column.

In [None]:
SELECT list.element AS skill, COUNT(*) AS frequency
FROM public_job_listings.data_nerd_jobs, UNNEST(keywords_all.list) AS list
GROUP BY skill
ORDER BY frequency DESC
LIMIT 10;

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
# Set dark theme
sns.set(style='darkgrid')
plt.style.use('dark_background')
# Create the bar plot
plt.figure(figsize=(12, 8))
sns.barplot(x='frequency', y='skill', data=top_skills_df, palette='Blues_r')
# Add labels and title
plt.xlabel('Frequency')
plt.ylabel('Skill')
plt.title('Top 10 Most Popular Skills')
# Show the plot
plt.show()

In [None]:
SELECT * FROM public_job_listings.data_nerd_jobs LIMIT 1;

## Job Title Analysis
Analyzing the three most popular job titles in the `job_title_clean` column.

In [None]:
SELECT job_title_final, COUNT(*) AS frequency
FROM public_job_listings.data_nerd_jobs
GROUP BY job_title_final
ORDER BY frequency DESC
LIMIT 3;

## Skill Analysis by Job Title
Analyzing the most popular skills for the top 3 job titles.

In [None]:
SELECT job_title_final, list.element AS skill, COUNT(*) AS frequency
FROM public_job_listings.data_nerd_jobs, UNNEST(keywords_all.list) AS list
WHERE job_title_final IN ('Data Analyst', 'Data Engineer', 'Data Scientist')
GROUP BY job_title_final, skill
ORDER BY job_title_final, frequency DESC

## Final Skill Analysis by Job Title
Addressing the remaining issue to show all relevant skills for the top 3 job titles.

In [None]:
skills_by_job_title_df = revised_skills_by_job_title_df
# Plotting without filtering the top 10 skills
plt.figure(figsize=(20, 15))
sns.barplot(x='skill_percent', y='skill', hue='job_title_final', data=revised_skills_by_job_title_df, palette='Blues_r')
# Labels and Title
plt.xlabel('Skill Percent (%)')
plt.ylabel('Skill')
plt.title('All Relevant Skills by Job Title (Percentage)')
# Show the plot
plt.show()

## Selected Skills Analysis by Job Title
Focusing on specific skills: Excel, SQL, Python, Tableau, Power BI, and R.

In [None]:
# Filter data for selected skills
selected_skills = ['excel', 'sql', 'python', 'tableau', 'power bi', 'r']
filtered_skills_df = revised_skills_by_job_title_df[revised_skills_by_job_title_df['skill'].str.lower().isin(selected_skills)]
# Plotting
plt.figure(figsize=(15, 10))
sns.barplot(x='skill_percent', y='skill', hue='job_title_final', data=filtered_skills_df, palette='Blues_r')
# Labels and Title
plt.xlabel('Skill Percent (%)')
plt.ylabel('Skill')
plt.title('Selected Skills by Job Title (Percentage)')
# Show the plot
plt.show()

## Skill Popularity Summary
Summing up the percentages of each skill across the top 3 job titles and ordering them from high to low.

In [None]:
# Summing up the percentages for each skill across job titles
skill_sums = filtered_skills_df.groupby('skill')['skill_percent'].sum().reset_index()
# Sorting the skills by the summed percentages, high to low
sorted_skills = skill_sums.sort_values(by='skill_percent', ascending=False)
sorted_skills

## Reordered Selected Skills by Job Title
Reordering the bar graph based on the summed percentages of each skill.

In [None]:
# Merge the sorted skills data with the original data to get the new order
merged_df = filtered_skills_df.merge(sorted_skills, on='skill', suffixes=('', '_sum'))
# Plotting
plt.figure(figsize=(15, 10))
sns.barplot(x='skill_percent', y='skill', hue='job_title_final', data=merged_df, order=sorted_skills['skill'], palette='Blues_r')
# Labels and Title
plt.xlabel('Demand for Skill in Job Postings (%)')
plt.ylabel('')
plt.title('Top Requested Skills for Data Science Jobs')
# Show the plot
plt.show()