# Exploratory Data Analysis (EDA)
## Candidate Job Role Prediction

This notebook explores the distribution of skills, experience levels, and job roles to inform the feature engineering strategy.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('../candidate_job_role_dataset.csv')
df.head()

### Target Distribution

In [None]:
plt.figure(figsize=(10,6))
sns.countplot(y='job_role', data=df, order=df['job_role'].value_counts().index)
plt.title('Distribution of Job Roles')
plt.show()

### Experience Level vs Job Role

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(data=df, x='experience_level', hue='job_role')
plt.xticks(rotation=45)
plt.title('Experience Level by Job Role')
plt.show()

### Skills Analysis

In [None]:
from collections import Counter
all_skills = []
df['skills'].dropna().apply(lambda x: all_skills.extend([s.strip() for s in x.split(',')]))

skill_counts = Counter(all_skills).most_common(20)
skill_df = pd.DataFrame(skill_counts, columns=['Skill', 'Count'])

plt.figure(figsize=(10,8))
sns.barplot(data=skill_df, x='Count', y='Skill')
plt.title('Top 20 Skills')
plt.show()