# Teachers Table EDA and GroupBy Analysis

In [None]:
# ---- Configuration ----
HOST = "127.0.0.1"
USER = "root"
PASSWORD = "yourpassword"
DATABASE = "your_database"


In [None]:
# ---- Imports & Connection ----
import pandas as pd
import mysql.connector
import matplotlib.pyplot as plt

%matplotlib inline

conn = mysql.connector.connect(
    host=HOST,
    user=USER,
    password=PASSWORD,
    database=DATABASE
)
print("âœ… Connected to MySQL!")

In [None]:
# ---- Load teachers table ----
df = pd.read_sql("SELECT * FROM teachers;", conn)
df.head()

In [None]:
# ---- Exploratory Analysis ----
print("--- Info ---")
print(df.info())
print("\n--- Summary ---")
print(df.describe(include='all'))
print("\n--- Missing Values ---")
print(df.isnull().sum())

In [None]:
# ---- Average salary per degree ----
avg_salary_degree = df.groupby("degree")['salary'].mean()
display(avg_salary_degree)

avg_salary_degree.plot(kind='bar', title="Average Salary by Degree", figsize=(8,5))
plt.show()

In [None]:
# ---- Average salary per school ----
avg_salary_school = df.groupby("school")['salary'].mean()
display(avg_salary_school)

avg_salary_school.plot(kind='bar', title="Average Salary by School", figsize=(8,5))
plt.show()

In [None]:
# ---- Teachers count per department ----
dept_count = df['department'].value_counts()
display(dept_count)

dept_count.plot(kind='bar', title="Teachers Count by Department", figsize=(8,5))
plt.show()

In [None]:
# ---- Average salary per country ----
avg_salary_country = df.groupby("country")['salary'].mean()
display(avg_salary_country)

avg_salary_country.plot(kind='bar', title="Average Salary by Country", figsize=(8,5))
plt.show()

In [None]:
# ---- Hire trend by year ----
hire_trend = df['hire_year'].value_counts().sort_index()
display(hire_trend)

hire_trend.plot(kind='line', marker='o', title="Hire Trend by Year", figsize=(8,5))
plt.show()

## Salary Distribution Visualization

In [None]:
# ---- Salary distribution histogram ----
df['salary'].plot(kind='hist', bins=20, title="Salary Distribution Histogram", figsize=(8,5))
plt.xlabel("Salary")
plt.show()

In [None]:
# ---- Salary distribution boxplot ----
df['salary'].plot(kind='box', title="Salary Distribution Boxplot", figsize=(5,7))
plt.show()

## Salary Distribution by Groups

In [None]:
# ---- Salary distribution by degree ----
df.boxplot(column='salary', by='degree', grid=False, figsize=(8,6))
plt.title("Salary Distribution by Degree")
plt.suptitle("")
plt.xlabel("Degree")
plt.ylabel("Salary")
plt.show()

In [None]:
# ---- Salary distribution by school ----
df.boxplot(column='salary', by='school', grid=False, figsize=(10,6))
plt.title("Salary Distribution by School")
plt.suptitle("")
plt.xlabel("School")
plt.ylabel("Salary")
plt.show()

In [None]:
# ---- Cleanup ----
try:
    conn.close()
    print("ðŸ”Œ MySQL connection closed.")
except Exception as e:
    print("Connection close error:", e)