In [0]:
%run "/Users/ovidiumtoma@gmail.com/wind_turbine_project/src/wt_ingestion"

In [0]:
import pytest
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, regexp_extract, lit

@pytest.fixture(scope="session")
def spark():
    spark = SparkSession.builder \
        .master("local[*]") \
        .appName("TestRawDataIngestor") \
        .getOrCreate()
    yield spark
    spark.stop()

def test_extract_turbine_id_from_metadata(spark):
    # Simulate a DataFrame with file metadata (_metadata.file_path)
    data = [("dbfs:/FileStore/tables/Location2.csv",)]
    schema = ["_metadata.file_path"]
    df = spark.createDataFrame(data, schema)
    
    # Mimic the extraction logic from load_new_turbine_data
    df_extracted = df.withColumn(
        "extracted_turbine_id",
        regexp_extract(col("_metadata.file_path"), "(?i)Location(\\d+)", 1)
    )
    df_extracted = df_extracted.withColumn(
        "turbine_id",
        col("extracted_turbine_id").cast("int") + lit(0)  # no offset for test
    ).drop("extracted_turbine_id")
    
    result = df_extracted.select("turbine_id").collect()[0]["turbine_id"]
    assert result == 2
