<a href="https://colab.research.google.com/github/sheikh495/Intermediate-SQL/blob/main/Intermediate_SQL_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import sqlite3
import pandas as pd

# Step 1: Create a connection to SQLite in memory
connection = sqlite3.connect(':memory:')  # In-memory database for temporary use
cursor = connection.cursor()

# Step 2: Create the tables
cursor.execute('''
CREATE TABLE Customers (
    CustomerID INT PRIMARY KEY,
    CustomerName VARCHAR(100),
    Country VARCHAR(50)
);
''')

cursor.execute('''
CREATE TABLE Orders (
    OrderID INT PRIMARY KEY,
    CustomerID INT,
    OrderDate DATE,
    TotalAmount DECIMAL(10, 2),
    FOREIGN KEY (CustomerID) REFERENCES Customers(CustomerID)
);
''')

# Step 3: Insert data into Customers table
cursor.executemany('''
INSERT INTO Customers (CustomerID, CustomerName, Country) VALUES (?, ?, ?);
''', [
    (1, 'Alice', 'USA'),
    (2, 'Bob', 'UK'),
    (3, 'Charlie', 'USA'),
    (4, 'Diana', 'Canada'),
    (5, 'Eve', 'Australia')
])

# Insert data into Orders table
cursor.executemany('''
INSERT INTO Orders (OrderID, CustomerID, OrderDate, TotalAmount) VALUES (?, ?, ?, ?);
''', [
    (101, 1, '2024-01-15', 120.50),
    (102, 1, '2024-02-10', 80.00),
    (103, 2, '2024-01-20', 200.00),
    (104, 3, '2024-03-12', 150.00),
    (105, 4, '2024-02-05', 60.00),
    (106, 5, '2024-03-30', 90.00)
])

# Commit the changes
connection.commit()

# Function to run a query and return a DataFrame
def run_query(query):
    return pd.read_sql_query(query, connection)

# Problem 1: Customer Orders with Conditional Logic
query1 = '''
SELECT
    c.CustomerName,
    c.Country,
    o.TotalAmount,
    CASE
        WHEN o.TotalAmount > 150 THEN 'High'
        WHEN o.TotalAmount BETWEEN 100 AND 150 THEN 'Medium'
        ELSE 'Low'
    END AS OrderCategory
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID;
'''
df1 = run_query(query1)

# Problem 2: Grouping and Aggregation by Country
query2 = '''
SELECT
    c.Country,
    COUNT(o.OrderID) AS TotalOrders
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
GROUP BY c.Country
HAVING COUNT(o.OrderID) > 1;
'''
df2 = run_query(query2)

# Problem 3: Combining Tables with Joins
query3 = '''
SELECT
    c.CustomerName,
    o.OrderID,
    o.TotalAmount
FROM Customers c
LEFT JOIN Orders o ON c.CustomerID = o.CustomerID;
'''
df3 = run_query(query3)

# Problem 4: Subqueries
query4 = '''
SELECT
    c.CustomerName,
    o.TotalAmount
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
WHERE o.TotalAmount > (SELECT AVG(TotalAmount) FROM Orders);
'''
df4 = run_query(query4)

# Problem 5: Using a CTE for Average Order Amount
query5 = '''
WITH CustomerAverage AS (
    SELECT
        c.Country,
        AVG(o.TotalAmount) AS CountryAverage
    FROM Orders o
    JOIN Customers c ON o.CustomerID = c.CustomerID
    GROUP BY c.Country
)
SELECT
    c.CustomerName,
    o.TotalAmount
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
JOIN CustomerAverage ca ON c.Country = ca.Country
WHERE o.TotalAmount > ca.CountryAverage;
'''
df5 = run_query(query5)

# Display the results
df1.head(), df2.head(), df3.head(), df4.head(), df5.head()


(  CustomerName Country  TotalAmount OrderCategory
 0        Alice     USA        120.5        Medium
 1        Alice     USA         80.0           Low
 2          Bob      UK        200.0          High
 3      Charlie     USA        150.0        Medium
 4        Diana  Canada         60.0           Low,
   Country  TotalOrders
 0     USA            3,
   CustomerName  OrderID  TotalAmount
 0        Alice      101        120.5
 1        Alice      102         80.0
 2          Bob      103        200.0
 3      Charlie      104        150.0
 4        Diana      105         60.0,
   CustomerName  TotalAmount
 0        Alice        120.5
 1          Bob        200.0
 2      Charlie        150.0,
   CustomerName  TotalAmount
 0        Alice        120.5
 1      Charlie        150.0)

V2

In [12]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import io
import base64

# Create a connection to SQLite in memory
connection = sqlite3.connect(':memory:')
cursor = connection.cursor()

# Create the tables
cursor.execute('''
CREATE TABLE Customers (
    CustomerID INT PRIMARY KEY,
    CustomerName VARCHAR(100),
    Country VARCHAR(50)
);
''')

cursor.execute('''
CREATE TABLE Orders (
    OrderID INT PRIMARY KEY,
    CustomerID INT,
    OrderDate DATE,
    TotalAmount DECIMAL(10, 2),
    FOREIGN KEY (CustomerID) REFERENCES Customers(CustomerID)
);
''')

# Insert data into Customers table
cursor.executemany('''
INSERT INTO Customers (CustomerID, CustomerName, Country) VALUES (?, ?, ?);
''', [
    (1, 'Alice', 'USA'),
    (2, 'Bob', 'UK'),
    (3, 'Charlie', 'USA'),
    (4, 'Diana', 'Canada'),
    (5, 'Eve', 'Australia')
])

# Insert data into Orders table
cursor.executemany('''
INSERT INTO Orders (OrderID, CustomerID, OrderDate, TotalAmount) VALUES (?, ?, ?, ?);
''', [
    (101, 1, '2024-01-15', 120.50),
    (102, 1, '2024-02-10', 80.00),
    (103, 2, '2024-01-20', 200.00),
    (104, 3, '2024-03-12', 150.00),
    (105, 4, '2024-02-05', 60.00),
    (106, 5, '2024-03-30', 90.00)
])

# Commit the changes
connection.commit()

# Function to run a query and return a DataFrame
def run_query(query):
    return pd.read_sql_query(query, connection)

# Function to convert matplotlib plot to HTML image
def plot_to_html(fig):
    buf = io.BytesIO()
    fig.savefig(buf, format='png')
    buf.seek(0)
    img_str = base64.b64encode(buf.read()).decode()
    return f'<img src="data:image/png;base64,{img_str}"/>'

# Problem 1: Customer Orders with Conditional Logic
query1 = '''
SELECT
    c.CustomerName,
    c.Country,
    o.TotalAmount,
    CASE
        WHEN o.TotalAmount > 150 THEN 'High'
        WHEN o.TotalAmount BETWEEN 100 AND 150 THEN 'Medium'
        ELSE 'Low'
    END AS OrderCategory
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID;
'''
df1 = run_query(query1)

# Plot the distribution of Order Categories
fig1, ax1 = plt.subplots(figsize=(10, 6))
sns.countplot(data=df1, x='OrderCategory', ax=ax1, palette='viridis')
ax1.set_title('Order Category Distribution')
ax1.set_xlabel('Order Category')
ax1.set_ylabel('Count')
html_plot1 = plot_to_html(fig1)
plt.close(fig1)

# Problem 2: Grouping and Aggregation by Country
query2 = '''
SELECT
    c.Country,
    COUNT(o.OrderID) AS TotalOrders
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
GROUP BY c.Country
HAVING COUNT(o.OrderID) > 1;
'''
df2 = run_query(query2)

# Plot the number of orders by country
fig2, ax2 = plt.subplots(figsize=(10, 6))
sns.barplot(data=df2, x='Country', y='TotalOrders', ax=ax2, palette='magma')
ax2.set_title('Total Orders by Country')
ax2.set_xlabel('Country')
ax2.set_ylabel('Total Orders')
html_plot2 = plot_to_html(fig2)
plt.close(fig2)

# Problem 3: Combining Tables with Joins
query3 = '''
SELECT
    c.CustomerName,
    o.OrderID,
    o.TotalAmount
FROM Customers c
LEFT JOIN Orders o ON c.CustomerID = o.CustomerID;
'''
df3 = run_query(query3)

# Plot the total amount of orders by customer
fig3, ax3 = plt.subplots(figsize=(10, 6))
sns.barplot(data=df3, x='CustomerName', y='TotalAmount', ax=ax3, palette='coolwarm')
ax3.set_title('Total Amount of Orders by Customer')
ax3.set_xlabel('Customer Name')
ax3.set_ylabel('Total Amount')
html_plot3 = plot_to_html(fig3)
plt.close(fig3)

# Problem 4: Subqueries
query4 = '''
SELECT
    c.CustomerName,
    o.TotalAmount
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
WHERE o.TotalAmount > (SELECT AVG(TotalAmount) FROM Orders);
'''
df4 = run_query(query4)

# Plot the orders that are above average
fig4, ax4 = plt.subplots(figsize=(10, 6))
sns.barplot(data=df4, x='CustomerName', y='TotalAmount', ax=ax4, palette='pastel')
ax4.set_title('Orders Above Average Amount')
ax4.set_xlabel('Customer Name')
ax4.set_ylabel('Total Amount')
html_plot4 = plot_to_html(fig4)
plt.close(fig4)

# Problem 5: Using a CTE for Average Order Amount
query5 = '''
WITH CustomerAverage AS (
    SELECT
        c.Country,
        AVG(o.TotalAmount) AS CountryAverage
    FROM Orders o
    JOIN Customers c ON o.CustomerID = c.CustomerID
    GROUP BY c.Country
)
SELECT
    c.CustomerName,
    o.TotalAmount
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
JOIN CustomerAverage ca ON c.Country = ca.Country
WHERE o.TotalAmount > ca.CountryAverage;
'''
df5 = run_query(query5)

# Plot orders above country average
fig5, ax5 = plt.subplots(figsize=(10, 6))
sns.barplot(data=df5, x='CustomerName', y='TotalAmount', ax=ax5, palette='Set2')
ax5.set_title('Orders Above Country Average Amount')
ax5.set_xlabel('Customer Name')
ax5.set_ylabel('Total Amount')
html_plot5 = plot_to_html(fig5)
plt.close(fig5)

# Create HTML report
html_report = f'''
<!DOCTYPE html>
<html>
<head>
    <title>Database Report</title>
    <style>
        body {{ font-family: Arial, sans-serif; }}
        table {{ width: 100%; border-collapse: collapse; margin-bottom: 20px; }}
        th, td {{ border: 1px solid #ddd; padding: 8px; }}
        th {{ background-color: #f2f2f2; }}
        h1 {{ color: #333; }}
        h2 {{ color: #555; }}
    </style>
</head>
<body>
    <h1>Database Report</h1>
    <h2>1. Customer Orders with Conditional Logic:</h2>
    {df1.to_html(index=False)}
    {html_plot1}
    <h2>2. Grouping and Aggregation by Country:</h2>
    {df2.to_html(index=False)}
    {html_plot2}
    <h2>3. Combining Tables with Joins:</h2>
    {df3.to_html(index=False)}
    {html_plot3}
    <h2>4. Orders Above Average Amount:</h2>
    {df4.to_html(index=False)}
    {html_plot4}
    <h2>5. Orders Above Country Average Amount:</h2>
    {df5.to_html(index=False)}
    {html_plot5}
</body>
</html>
'''

# Save HTML report to file
with open('/content/Database_Report.html', 'w') as f:
    f.write(html_report)

from google.colab import files
files.download('/content/Database_Report.html')

print("HTML report has been generated and saved as 'Database_Report.html'.")



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(data=df1, x='OrderCategory', ax=ax1, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=df2, x='Country', y='TotalOrders', ax=ax2, palette='magma')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=df3, x='CustomerName', y='TotalAmount', ax=ax3, palette='coolwarm')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=df4, x='CustomerName', y='TotalAmount', ax=ax4, palette='pastel')

Passing 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

HTML report has been generated and saved as 'Database_Report.html'.


V3

In [13]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import io
import base64

# Create a connection to SQLite in memory
connection = sqlite3.connect(':memory:')
cursor = connection.cursor()

# Create the tables
cursor.execute('''
CREATE TABLE Customers (
    CustomerID INT PRIMARY KEY,
    CustomerName VARCHAR(100),
    Country VARCHAR(50)
);
''')

cursor.execute('''
CREATE TABLE Orders (
    OrderID INT PRIMARY KEY,
    CustomerID INT,
    OrderDate DATE,
    TotalAmount DECIMAL(10, 2),
    FOREIGN KEY (CustomerID) REFERENCES Customers(CustomerID)
);
''')

# Insert data into Customers table
cursor.executemany('''
INSERT INTO Customers (CustomerID, CustomerName, Country) VALUES (?, ?, ?);
''', [
    (1, 'Alice', 'USA'),
    (2, 'Bob', 'UK'),
    (3, 'Charlie', 'USA'),
    (4, 'Diana', 'Canada'),
    (5, 'Eve', 'Australia')
])

# Insert data into Orders table
cursor.executemany('''
INSERT INTO Orders (OrderID, CustomerID, OrderDate, TotalAmount) VALUES (?, ?, ?, ?);
''', [
    (101, 1, '2024-01-15', 120.50),
    (102, 1, '2024-02-10', 80.00),
    (103, 2, '2024-01-20', 200.00),
    (104, 3, '2024-03-12', 150.00),
    (105, 4, '2024-02-05', 60.00),
    (106, 5, '2024-03-30', 90.00)
])

# Commit the changes
connection.commit()

# Function to run a query and return a DataFrame
def run_query(query):
    return pd.read_sql_query(query, connection)

# Function to convert matplotlib plot to HTML image
def plot_to_html(fig):
    buf = io.BytesIO()
    fig.savefig(buf, format='png')
    buf.seek(0)
    img_str = base64.b64encode(buf.read()).decode()
    return f'<img src="data:image/png;base64,{img_str}"/>'

# Problem 1: Customer Orders with Conditional Logic
query1 = '''
SELECT
    c.CustomerName,
    c.Country,
    o.TotalAmount,
    CASE
        WHEN o.TotalAmount > 150 THEN 'High'
        WHEN o.TotalAmount BETWEEN 100 AND 150 THEN 'Medium'
        ELSE 'Low'
    END AS OrderCategory
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID;
'''
df1 = run_query(query1)

# Plot the distribution of Order Categories
fig1, ax1 = plt.subplots(figsize=(10, 6))
sns.countplot(data=df1, x='OrderCategory', ax=ax1, palette='viridis')
ax1.set_title('Order Category Distribution')
ax1.set_xlabel('Order Category')
ax1.set_ylabel('Count')
html_plot1 = plot_to_html(fig1)
plt.close(fig1)

# Problem 2: Grouping and Aggregation by Country
query2 = '''
SELECT
    c.Country,
    COUNT(o.OrderID) AS TotalOrders
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
GROUP BY c.Country
HAVING COUNT(o.OrderID) > 1;
'''
df2 = run_query(query2)

# Plot the number of orders by country
fig2, ax2 = plt.subplots(figsize=(10, 6))
sns.barplot(data=df2, x='Country', y='TotalOrders', ax=ax2, palette='magma')
ax2.set_title('Total Orders by Country')
ax2.set_xlabel('Country')
ax2.set_ylabel('Total Orders')
html_plot2 = plot_to_html(fig2)
plt.close(fig2)

# Problem 3: Combining Tables with Joins
query3 = '''
SELECT
    c.CustomerName,
    o.OrderID,
    o.TotalAmount
FROM Customers c
LEFT JOIN Orders o ON c.CustomerID = o.CustomerID;
'''
df3 = run_query(query3)

# Plot the total amount of orders by customer
fig3, ax3 = plt.subplots(figsize=(10, 6))
sns.barplot(data=df3, x='CustomerName', y='TotalAmount', ax=ax3, palette='coolwarm')
ax3.set_title('Total Amount of Orders by Customer')
ax3.set_xlabel('Customer Name')
ax3.set_ylabel('Total Amount')
html_plot3 = plot_to_html(fig3)
plt.close(fig3)

# Problem 4: Subqueries
query4 = '''
SELECT
    c.CustomerName,
    o.TotalAmount
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
WHERE o.TotalAmount > (SELECT AVG(TotalAmount) FROM Orders);
'''
df4 = run_query(query4)

# Plot the orders that are above average
fig4, ax4 = plt.subplots(figsize=(10, 6))
sns.barplot(data=df4, x='CustomerName', y='TotalAmount', ax=ax4, palette='pastel')
ax4.set_title('Orders Above Average Amount')
ax4.set_xlabel('Customer Name')
ax4.set_ylabel('Total Amount')
html_plot4 = plot_to_html(fig4)
plt.close(fig4)

# Problem 5: Using a CTE for Average Order Amount
query5 = '''
WITH CustomerAverage AS (
    SELECT
        c.Country,
        AVG(o.TotalAmount) AS CountryAverage
    FROM Orders o
    JOIN Customers c ON o.CustomerID = c.CustomerID
    GROUP BY c.Country
)
SELECT
    c.CustomerName,
    o.TotalAmount
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
JOIN CustomerAverage ca ON c.Country = ca.Country
WHERE o.TotalAmount > ca.CountryAverage;
'''
df5 = run_query(query5)

# Plot orders above country average
fig5, ax5 = plt.subplots(figsize=(10, 6))
sns.barplot(data=df5, x='CustomerName', y='TotalAmount', ax=ax5, palette='Set2')
ax5.set_title('Orders Above Country Average Amount')
ax5.set_xlabel('Customer Name')
ax5.set_ylabel('Total Amount')
html_plot5 = plot_to_html(fig5)
plt.close(fig5)

# Create HTML report
html_report = f'''
<!DOCTYPE html>
<html>
<head>
    <title>Database Report</title>
    <style>
        body {{ font-family: Arial, sans-serif; }}
        table {{ width: 100%; border-collapse: collapse; margin-bottom: 20px; }}
        th, td {{ border: 1px solid #ddd; padding: 8px; }}
        th {{ background-color: #f2f2f2; }}
        h1 {{ color: #333; }}
        h2 {{ color: #555; }}
    </style>
</head>
<body>
    <h1>Database Report</h1>
    <h2>1. Customer Orders with Conditional Logic:</h2>
    {df1.to_html(index=False)}
    {html_plot1}
    <h2>2. Grouping and Aggregation by Country:</h2>
    {df2.to_html(index=False)}
    {html_plot2}
    <h2>3. Combining Tables with Joins:</h2>
    {df3.to_html(index=False)}
    {html_plot3}
    <h2>4. Orders Above Average Amount:</h2>
    {df4.to_html(index=False)}
    {html_plot4}
    <h2>5. Orders Above Country Average Amount:</h2>
    {df5.to_html(index=False)}
    {html_plot5}
</body>
</html>
'''

# Save HTML report to file
with open('/content/Database_Report.html', 'w') as f:
    f.write(html_report)

from google.colab import files
files.download('/content/Database_Report.html')

print("HTML report has been generated and saved as 'Database_Report.html'.")



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(data=df1, x='OrderCategory', ax=ax1, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=df2, x='Country', y='TotalOrders', ax=ax2, palette='magma')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=df3, x='CustomerName', y='TotalAmount', ax=ax3, palette='coolwarm')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=df4, x='CustomerName', y='TotalAmount', ax=ax4, palette='pastel')

Passing 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

HTML report has been generated and saved as 'Database_Report.html'.


# V3

Step 1: Install Required Libraries

In [14]:
!pip install graphviz pydot




Step 2: Generate the ERD

In [15]:
from graphviz import Digraph

# Create ERD diagram
dot = Digraph()

# Define nodes for tables
dot.node('Customers', 'Customers\n(CustomerID, CustomerName, Country)')
dot.node('Orders', 'Orders\n(OrderID, CustomerID, OrderDate, TotalAmount)')

# Define relationships
dot.edge('Customers', 'Orders', label='Has')

# Save ERD to a file
erd_path = '/content/ERD.png'
dot.format = 'png'
dot.render(filename=erd_path, directory='/content')

# Convert ERD to HTML image
def erd_to_html(erd_path):
    with open(erd_path, 'rb') as f:
        img_str = base64.b64encode(f.read()).decode()
    return f'<img src="data:image/png;base64,{img_str}"/>'

html_erd = erd_to_html(erd_path)


Step 3: Include the ERD in the HTML Report

In [17]:
# Create HTML report with ERD
html_report = f'''
<!DOCTYPE html>
<html>
<head>
    <title>Database Report</title>
    <style>
        body {{ font-family: Arial, sans-serif; }}
        table {{ width: 100%; border-collapse: collapse; margin-bottom: 20px; }}
        th, td {{ border: 1px solid #ddd; padding: 8px; }}
        th {{ background-color: #f2f2f2; }}
        h1 {{ color: #333; }}
        h2 {{ color: #555; }}
    </style>
</head>
<body>
    <h1>Database Report</h1>
    <h2>Entity-Relationship Diagram:</h2>
    {html_erd}
    <h2>1. Customer Orders with Conditional Logic:</h2>
    {df1.to_html(index=False)}
    {html_plot1}
    <h2>2. Grouping and Aggregation by Country:</h2>
    {df2.to_html(index=False)}
    {html_plot2}
    <h2>3. Combining Tables with Joins:</h2>
    {df3.to_html(index=False)}
    {html_plot3}
    <h2>4. Orders Above Average Amount:</h2>
    {df4.to_html(index=False)}
    {html_plot4}
    <h2>5. Orders Above Country Average Amount:</h2>
    {df5.to_html(index=False)}
    {html_plot5}
</body>
</html>
'''

# Save HTML report to file
with open('/content/Database_Report_with_ERD.html', 'w') as f:
    f.write(html_report)

from google.colab import files
files.download('/content/Database_Report_with_ERD.html')

print("HTML report with ERD has been generated and saved as 'Database_Report_with_ERD.html'.")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

HTML report with ERD has been generated and saved as 'Database_Report_with_ERD.html'.


# v4

!pip install graphviz pydot


In [18]:
!pip install graphviz pydot




Generate the Detailed ERD

In [19]:
from graphviz import Digraph
import base64
import io

# Create a detailed ERD diagram
dot = Digraph()

# Define nodes for tables with detailed attributes
dot.node('Customers', '''
Customers
----------
CustomerID (PK)
CustomerName
Country
''', shape='rect', style='filled', color='lightblue')

dot.node('Orders', '''
Orders
-------
OrderID (PK)
CustomerID (FK)
OrderDate
TotalAmount
''', shape='rect', style='filled', color='lightgreen')

# Define relationships
dot.edge('Customers', 'Orders', label='Places')

# Save ERD to a file
erd_path = '/content/Detailed_ERD.png'
dot.format = 'png'
dot.render(filename=erd_path, directory='/content')

# Convert ERD to HTML image
def erd_to_html(erd_path):
    with open(erd_path, 'rb') as f:
        img_str = base64.b64encode(f.read()).decode()
    return f'<img src="data:image/png;base64,{img_str}"/>'

html_erd = erd_to_html(erd_path)


Include the ERD in the HTML Report

In [20]:
# Create HTML report with detailed ERD
html_report = f'''
<!DOCTYPE html>
<html>
<head>
    <title>Database Report</title>
    <style>
        body {{ font-family: Arial, sans-serif; }}
        table {{ width: 100%; border-collapse: collapse; margin-bottom: 20px; }}
        th, td {{ border: 1px solid #ddd; padding: 8px; }}
        th {{ background-color: #f2f2f2; }}
        h1 {{ color: #333; }}
        h2 {{ color: #555; }}
        .erd {{ border: 1px solid #ddd; padding: 10px; }}
    </style>
</head>
<body>
    <h1>Database Report</h1>
    <h2>Entity-Relationship Diagram:</h2>
    <div class="erd">{html_erd}</div>
    <h2>1. Customer Orders with Conditional Logic:</h2>
    {df1.to_html(index=False)}
    {html_plot1}
    <h2>2. Grouping and Aggregation by Country:</h2>
    {df2.to_html(index=False)}
    {html_plot2}
    <h2>3. Combining Tables with Joins:</h2>
    {df3.to_html(index=False)}
    {html_plot3}
    <h2>4. Orders Above Average Amount:</h2>
    {df4.to_html(index=False)}
    {html_plot4}
    <h2>5. Orders Above Country Average Amount:</h2>
    {df5.to_html(index=False)}
    {html_plot5}
</body>
</html>
'''

# Save HTML report to file
with open('/content/Database_Report_with_Detailed_ERD.html', 'w') as f:
    f.write(html_report)

from google.colab import files
files.download('/content/Database_Report_with_Detailed_ERD.html')

print("HTML report with detailed ERD has been generated and saved as 'Database_Report_with_Detailed_ERD.html'.")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

HTML report with detailed ERD has been generated and saved as 'Database_Report_with_Detailed_ERD.html'.
