In [1]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import RandomForestClassificationModel
from pyspark.sql import Row
from pyspark.sql.functions import col, when

# Initialize Spark session
spark = SparkSession.builder \
    .appName("HouseLoanPrediction") \
    .getOrCreate()

# Path to the saved RandomForest model
rf_classifier_path = r"E:\Class\sem3\big data framework\Project\rf_model"

# Load the RandomForest model
rf_classifier = RandomForestClassificationModel.load(rf_classifier_path)

# Manually provided values
manual_data = [
    Row(BASEMENTAREA_MODE=0.1, NONLIVINGAREA_AVG=50.0, APARTMENTS_MODE=2.0,
        LIVE_REGION_NOT_WORK_REGION=1.0, CODE_GENDER_index=0.0,
        REGION_POPULATION_RELATIVE=0.01, FLAG_DOCUMENT_17=0.0, ELEVATORS_AVG=1.0,
        AMT_GOODS_PRICE=300000000000.0, EXT_SOURCE_2=0.5)
]

# Convert the manually provided values to a DataFrame
manual_data_df = spark.createDataFrame(manual_data)

# Define the VectorAssembler with the appropriate input and output column names
vector_assembler = VectorAssembler(
    inputCols=["BASEMENTAREA_MODE", "NONLIVINGAREA_AVG", "APARTMENTS_MODE",
               "LIVE_REGION_NOT_WORK_REGION", "CODE_GENDER_index",
               "REGION_POPULATION_RELATIVE", "FLAG_DOCUMENT_17", "ELEVATORS_AVG",
               "AMT_GOODS_PRICE", "EXT_SOURCE_2"],
    outputCol="features"
)

# Transform the data to create feature vectors
assembled_data = vector_assembler.transform(manual_data_df)

# Rename the 'features' column to 'top_features' to match the model's expectation
final_data = assembled_data.withColumnRenamed("features", "top_features")

# Apply the RandomForest model to make predictions
manual_predictions = rf_classifier.transform(final_data)

# Add an interpretation column based on the prediction
manual_predictions_interpreted = manual_predictions.withColumn(
    "default_interpretation",
    when(col("prediction") >= 0.5, "Likely to Default").otherwise("Not Likely to Default")
)

# Select columns to display the interpretation
manual_predictions_selected = manual_predictions_interpreted.select("default_interpretation")

# Show the interpretation results
manual_predictions_selected.show()

# Stop Spark session
spark.stop()


+----------------------+
|default_interpretation|
+----------------------+
|  Not Likely to Def...|
+----------------------+

