<a href="https://colab.research.google.com/github/pratikabhang/AI-Driven-Cybersecurity-Threat-Prediction-Platform/blob/main/sql_task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
# Import required libraries and create an in-memory SQLite database

import sqlite3
import pandas as pd

# Create an in-memory SQLite database connection
conn = sqlite3.connect(":memory:")
cursor = conn.cursor()


In [27]:
# Create departments and employees tables

create_departments_table = """
CREATE TABLE departments (
    id INTEGER PRIMARY KEY,
    department_name TEXT NOT NULL,
    location TEXT
);
"""

create_employees_table = """
CREATE TABLE employees (
    employee_id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    salary REAL NOT NULL,
    department_id INTEGER,
    FOREIGN KEY (department_id) REFERENCES departments(id)
);
"""

cursor.execute(create_departments_table)
cursor.execute(create_employees_table)
conn.commit()


In [28]:
# Insert sample data into departments and employees tables

departments_data = [
    (1, "Sales", "New York"),
    (2, "HR", "London"),
    (3, "IT", "Berlin")
]

employees_data = [
    (1, "Alice",   90000, 1),
    (2, "Bob",     85000, 1),
    (3, "Charlie", 70000, 1),
    (4, "David",   95000, 1),
    (5, "Eva",     65000, 1),
    (6, "Frank",   72000, 1),
    (7, "Grace",   80000, 1),
    (8, "Hank",    60000, 1),
    (9, "Ivy",     88000, 1),
    (10, "Jack",   91000, 1),
    (11, "Karen",  93000, 1),
    (12, "Leo",    78000, 2),
    (13, "Mia",    82000, 2),
    (14, "Nina",   99000, 3),
    (15, "Oscar",  77000, 3)
]

cursor.executemany("INSERT INTO departments (id, department_name, location) VALUES (?, ?, ?);", departments_data)
cursor.executemany("INSERT INTO employees (employee_id, name, salary, department_id) VALUES (?, ?, ?, ?);", employees_data)
conn.commit()


In [29]:
# Helper function to run and display SQL queries using pandas

def run_query(title, query):
    # Print the title and result of the query
    print("\n" + "=" * 60)
    print(title)
    print("=" * 60)
    df = pd.read_sql_query(query, conn)
    print(df)


In [30]:
# Select name and salary of employees in 'Sales' (department_id = 1)

query_1 = """
SELECT
    name,
    salary
FROM
    employees
WHERE
    department_id = 1;
"""

run_query("1. Employees in 'Sales' (department_id = 1)", query_1)



1. Employees in 'Sales' (department_id = 1)
       name   salary
0     Alice  90000.0
1       Bob  85000.0
2   Charlie  70000.0
3     David  95000.0
4       Eva  65000.0
5     Frank  72000.0
6     Grace  80000.0
7      Hank  60000.0
8       Ivy  88000.0
9      Jack  91000.0
10    Karen  93000.0


In [31]:
# Find the top 5 highest-paid employees

query_2 = """
SELECT
    name,
    salary
FROM
    employees
ORDER BY
    salary DESC
LIMIT 5;
"""

run_query("2. Top 5 highest-paid employees", query_2)



2. Top 5 highest-paid employees
    name   salary
0   Nina  99000.0
1  David  95000.0
2  Karen  93000.0
3   Jack  91000.0
4  Alice  90000.0


In [32]:
# Calculate number of employees and average salary per department

query_3 = """
SELECT
    department_id,
    COUNT(employee_id) AS number_of_employees,
    AVG(salary) AS average_salary
FROM
    employees
GROUP BY
    department_id;
"""

run_query("3. Employees count and average salary per department", query_3)



3. Employees count and average salary per department
   department_id  number_of_employees  average_salary
0              1                   11    80818.181818
1              2                    2    80000.000000
2              3                    2    88000.000000


In [33]:
# Get each employee's name along with their department name

query_4 = """
SELECT
    e.name,
    d.department_name
FROM
    employees AS e
INNER JOIN
    departments AS d
    ON e.department_id = d.id;
"""

run_query("4. Employee names with their department names", query_4)



4. Employee names with their department names
       name department_name
0     Alice           Sales
1       Bob           Sales
2   Charlie           Sales
3     David           Sales
4       Eva           Sales
5     Frank           Sales
6     Grace           Sales
7      Hank           Sales
8       Ivy           Sales
9      Jack           Sales
10    Karen           Sales
11      Leo              HR
12      Mia              HR
13     Nina              IT
14    Oscar              IT


In [34]:
# Find departments that have more than 10 employees

query_5 = """
SELECT
    department_id,
    COUNT(employee_id) AS employee_count
FROM
    employees
GROUP BY
    department_id
HAVING
    COUNT(employee_id) > 10;
"""

run_query("5. Departments with more than 10 employees", query_5)



5. Departments with more than 10 employees
   department_id  employee_count
0              1              11


In [35]:
# Find employees who earn more than the average salary of their department

query_6 = """
WITH DepartmentAvgSalary AS (
    SELECT
        department_id,
        AVG(salary) AS avg_dept_salary
    FROM
        employees
    GROUP BY
        department_id
)
SELECT
    e.name,
    e.salary,
    e.department_id
FROM
    employees AS e
JOIN
    DepartmentAvgSalary AS d
    ON e.department_id = d.department_id
WHERE
    e.salary > d.avg_dept_salary;
"""

run_query("6. Employees earning more than their department's average salary", query_6)



6. Employees earning more than their department's average salary
    name   salary  department_id
0  Alice  90000.0              1
1    Bob  85000.0              1
2  David  95000.0              1
3    Ivy  88000.0              1
4   Jack  91000.0              1
5  Karen  93000.0              1
6    Mia  82000.0              2
7   Nina  99000.0              3


In [36]:
# Rank employees within each department based on salary

query_7 = """
SELECT
    name,
    department_id,
    salary,
    RANK() OVER (
        PARTITION BY department_id
        ORDER BY salary DESC
    ) AS salary_rank
FROM
    employees;
"""

run_query("7. Salary rank of employees within each department", query_7)
""


7. Salary rank of employees within each department
       name  department_id   salary  salary_rank
0     David              1  95000.0            1
1     Karen              1  93000.0            2
2      Jack              1  91000.0            3
3     Alice              1  90000.0            4
4       Ivy              1  88000.0            5
5       Bob              1  85000.0            6
6     Grace              1  80000.0            7
7     Frank              1  72000.0            8
8   Charlie              1  70000.0            9
9       Eva              1  65000.0           10
10     Hank              1  60000.0           11
11      Mia              2  82000.0            1
12      Leo              2  78000.0            2
13     Nina              3  99000.0            1
14    Oscar              3  77000.0            2


''