# Sales Analysis with MySQL + Python (Jupyter Notebook)

This notebook demonstrates how to connect to a MySQL database, read the `sales` table, run exploratory analysis, perform GROUP BY operations, and visualize results.

In [None]:
!pip install pandas matplotlib mysql-connector-python

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import mysql.connector


## Step 1: Connect to MySQL and Load Data

In [None]:

# Update with your own credentials and database
conn = mysql.connector.connect(
    host="localhost",      # change if needed
    user="root",           # your MySQL username
    password="yourpassword",  # your MySQL password
    database="testdb"      # your database name
)

query = "SELECT * FROM sales"
df = pd.read_sql(query, conn)
conn.close()

df.head()


## Step 2: Exploratory Analysis

In [None]:

print("=== Info ===")
print(df.info())

print("\n=== Summary Statistics ===")
print(df.describe())

df.head()


## Step 3: Group By Operations

In [None]:

print("=== Total quantity per region ===")
display(df.groupby("region")["quantity"].sum())

print("\n=== Average price per product ===")
display(df.groupby("product")["price"].mean())

print("\n=== Total revenue per region ===")
df["revenue"] = df["quantity"] * df["price"]
display(df.groupby("region")["revenue"].sum())

print("\n=== Total revenue per region & product ===")
display(df.groupby(["region", "product"])["revenue"].sum())


## Step 4: Visualizations

In [None]:

# Quantity per region
df.groupby("region")["quantity"].sum().plot(kind="bar", title="Total Quantity per Region")
plt.ylabel("Quantity")
plt.show()

# Average price per product
df.groupby("product")["price"].mean().plot(kind="bar", color="orange", title="Average Price per Product")
plt.ylabel("Average Price")
plt.show()

# Revenue per region
df.groupby("region")["revenue"].sum().plot(kind="bar", color="green", title="Total Revenue per Region")
plt.ylabel("Revenue")
plt.show()
