# Cats EDA: MySQL → Pandas → Visualizations

## Prerequisites
Install packages if needed:
```bash
pip install pandas mysql-connector-python matplotlib
```


In [None]:
# ---- Configuration ----
HOST = "127.0.0.1"
USER = "root"
PASSWORD = "yourpassword"
DATABASE = "your_database"
TABLE_NAME = "cats"


In [None]:
# ---- Imports & Connection ----
import pandas as pd
import mysql.connector
import matplotlib.pyplot as plt

%matplotlib inline

conn = mysql.connector.connect(
    host=HOST,
    user=USER,
    password=PASSWORD,
    database=DATABASE
)
print("✅ Connected to MySQL!")

In [None]:
# ---- Load cats table ----
query = f"SELECT * FROM {TABLE_NAME};"
df = pd.read_sql(query, conn)
print(f"Loaded {len(df)} rows from '{TABLE_NAME}'.")
df.head()

In [None]:
# ---- Info & Missing Values ----
print("DataFrame info:")
print(df.info())
print("\nMissing values per column:")
print(df.isna().sum())

df.describe(include='all')

In [None]:
# ---- Age distribution ----
plt.figure(figsize=(8,5))
df['age'].dropna().plot(kind='hist', bins=15)
plt.title("Age Distribution of Cats")
plt.xlabel("Age")
plt.ylabel("Count")
plt.show()

In [None]:
# ---- Gender distribution ----
plt.figure(figsize=(6,4))
df['gender'].value_counts().plot(kind='bar')
plt.title("Gender Distribution of Cats")
plt.xlabel("Gender")
plt.ylabel("Count")
plt.show()

In [None]:
# ---- Average tricks by breed ----
avg_tricks = df.groupby("breed")['num_of_tricks'].mean().sort_values(ascending=False)
display(avg_tricks)

avg_tricks.plot(kind='bar', figsize=(8,5), title="Average Tricks by Breed")
plt.show()

In [None]:
# ---- Count by country ----
count_by_country = df['country'].value_counts()
display(count_by_country)

count_by_country.plot(kind='bar', figsize=(8,5), title="Cats by Country")
plt.show()

## Join Cats with Breeds Table

In [None]:
# ---- Join cats with breeds table ----
query = f"""
SELECT c.*, b.description
FROM {TABLE_NAME} c
LEFT JOIN breeds b ON c.breed = b.breed;
"""
df_joined = pd.read_sql(query, conn)
print(f"Loaded {len(df_joined)} rows with breed descriptions.")
df_joined.head()

In [None]:
# ---- Average tricks by breed with description ----
avg_tricks_desc = df_joined.groupby(["breed","description"])['num_of_tricks'].mean().sort_values(ascending=False)
display(avg_tricks_desc)

avg_tricks_desc.plot(kind='bar', figsize=(10,6), title="Average Tricks by Breed (with Description)")
plt.ylabel("Average Tricks")
plt.show()

## Breed-Level Summary Table

In [None]:
# ---- Breed-level summary ----
breed_summary = df_joined.groupby(["breed","description"]).agg(
    count_cats=('id','count'),
    avg_age=('age','mean'),
    avg_tricks=('num_of_tricks','mean')
).reset_index()

print("Breed-level Summary:")
display(breed_summary)

# Optional: visualize average age by breed
breed_summary.plot(x='breed', y='avg_age', kind='bar', figsize=(10,5), title="Average Age per Breed")
plt.ylabel("Average Age")
plt.show()

# Optional: visualize number of cats by breed
breed_summary.plot(x='breed', y='count_cats', kind='bar', figsize=(10,5), title="Number of Cats per Breed")
plt.ylabel("Number of Cats")
plt.show()

In [None]:
# ---- Cleanup ----
try:
    conn.close()
    print("🔌 MySQL connection closed.")
except Exception as e:
    print("Error closing connection:", e)