#### AI for Data Engineering

In [0]:
def get_first_n_primes(n: int) -> List[int]:
    """
    Returns a list of the first n prime numbers.

    Parameters:
    n (int): Number of prime numbers to return.

    Returns:
    List[int]: List of the first n prime numbers.
    """
    primes = []
    num = 2
    while len(primes) < n:
        is_prime = True
        for i in range(2, int(num ** 0.5) + 1):
            if num % i == 0:
                is_prime = False
                break
        if is_prime:
            primes.append(num)
        num += 1
    return primes

# Example output
print(get_first_n_primes(5))

In [0]:
def test_get_first_n_primes():
    assert get_first_n_primes(0) == [], "Test case failed for n=0"
    assert get_first_n_primes(1) == [2], "Test case failed for n=1"
    assert get_first_n_primes(5) == [2, 3, 5, 7, 11], "Test case failed for n=5"
    assert get_first_n_primes(10) == [2, 3, 5, 7, 11, 13, 17, 19, 23, 29], "Test case failed for n=10"

test_get_first_n_primes()

In [0]:
# Create the table if it doesn't exist
spark.sql("""
CREATE TABLE IF NOT EXISTS pavan_naidu.demo.people (
    id INT,
    name STRING,
    dob DATE,
    gender STRING,
    city STRING
)
""")

# Truncate the table
spark.sql("TRUNCATE TABLE pavan_naidu.demo.people")

# Sample records
data = [
    (1, 'Alice', '1990-05-15', 'Female', 'New York'),
    (2, 'Bob', '1985-10-23', 'Male', 'Los Angeles'),
    (3, 'Charlie', '1978-03-12', 'Male', 'Chicago'),
    (4, 'Danielle', '1995-08-02', 'Female', 'San Francisco'),
    (5, 'Ethan', '1982-11-30', 'Male', 'Miami')
]

# Insert sample records into the table
df = spark.createDataFrame(data, ['id', 'name', 'dob', 'gender', 'city'])
df.write.mode('append').insertInto("pavan_naidu.demo.people")

# Display the table
display(spark.table("pavan_naidu.demo.people"))

In [0]:
spark.sql("""
CREATE TABLE IF NOT EXISTS pavan_naidu.demo.millennial
LIKE pavan_naidu.demo.people
""")

In [0]:
spark.sql("""
MERGE INTO pavan_naidu.demo.millennial AS target
USING (
    SELECT * FROM pavan_naidu.demo.people
    WHERE dob BETWEEN '1981-01-01' AND '1996-12-31'
) AS source
ON target.id = source.id
WHEN MATCHED THEN
    UPDATE SET
        target.name = source.name,
        target.dob = source.dob,
        target.gender = source.gender,
        target.city = source.city
WHEN NOT MATCHED THEN
    INSERT *
""")

# Display the millennial table
display(spark.table("pavan_naidu.demo.millennial"))

In [0]:
spark.sql("""
CREATE TABLE IF NOT EXISTS pavan_naidu.demo.millennial_stats (
    gender STRING,
    count BIGINT
)
""")

spark.sql("""
INSERT OVERWRITE TABLE pavan_naidu.demo.millennial_stats
SELECT gender, COUNT(*) AS count
FROM pavan_naidu.demo.millennial
GROUP BY gender
""")

display(spark.table("pavan_naidu.demo.millennial_stats"))

Databricks visualization. Run in Databricks to view.