# Employees Data Exploration (MySQL + db_config.json)

In [None]:
import json
import mysql.connector
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Load config
with open('db_config.json') as f:
    config = json.load(f)

conn = mysql.connector.connect(
    host=config['host'],
    user=config['user'],
    password=config['password'],
    database=config['database']
)

df = pd.read_sql('SELECT * FROM employees;', conn)
conn.close()

df.head()

## Basic stats

In [None]:
df[['salary','age']].describe()

## Counts

In [None]:
df['department'].value_counts()

In [None]:
df['country'].value_counts()

In [None]:
df['gender'].value_counts()

## Aggregations

In [None]:
dept_agg = df.groupby('department').agg(
    headcount=('emp_id','count'),
    avg_salary=('salary','mean'),
    avg_age=('age','mean')
).reset_index()
dept_agg.sort_values('headcount', ascending=False)

In [None]:
country_agg = df.groupby('country').agg(
    headcount=('emp_id','count'),
    avg_salary=('salary','mean')
).reset_index()
country_agg.sort_values('headcount', ascending=False)

## Plots

In [None]:
dept_agg.sort_values('headcount', ascending=False).plot(
    x='department', y='headcount', kind='bar', legend=False, title='Headcount by Department')
plt.tight_layout()

In [None]:
dept_agg.sort_values('avg_salary', ascending=False).plot(
    x='department', y='avg_salary', kind='bar', legend=False, title='Average Salary by Department')
plt.tight_layout()

In [None]:
country_agg.sort_values('headcount', ascending=False).plot(
    x='country', y='headcount', kind='bar', legend=False, title='Headcount by Country')
plt.tight_layout()

In [None]:
df['salary'].plot(kind='hist', bins=10, title='Salary Distribution')
plt.tight_layout()

In [None]:
df.plot(x='age', y='salary', kind='scatter', title='Age vs Salary')
plt.tight_layout()