# Model Inference

In [21]:
import mlflow
import pandas as pd
import numpy as np
import dagshub



# Best Model

### Import dagshub , model

In [22]:

try:
    # Initialize Dagshub only if the repo info is correct
    dagshub.init(repo_owner='konstantine25b', repo_name='Machine_learning', mlflow=True)
    print("DagsHub initialized successfully.")
    mlflow.set_experiment("House Prices Experiment - assignment 1")
    print(f"MLflow experiment set to: {mlflow.get_experiment_by_name('House Prices Experiment - assignment 1').name}")
except Exception as e:
    print(f"Could not initialize DagsHub or set MLflow experiment: {e}")
    print("Proceeding without MLflow tracking.")
    # Set a dummy client to avoid errors if tracking fails
    mlflow_active = False
else:
    mlflow_active = True




DagsHub initialized successfully.
MLflow experiment set to: House Prices Experiment - assignment 1


### data

In [25]:

try:
    df_test = pd.read_csv('./house-prices/test.csv')
    print("Test data loaded successfully.")
    print(f"Initial testing data shape: {df_test.shape}")
except FileNotFoundError:
    print("Error: test.csv not found in ./house-prices/. Please ensure the file exists.")
    exit() # Exit if data is missing
    



Test data loaded successfully.
Initial testing data shape: (1459, 80)


In [27]:
import os


logged_model_uri = 'runs:/2aa7b5ccbf3b42259fa7efe5be192ee6/model_pipeline' # Use the specific run ID

try:
    # Load the scikit-learn flavor directly
    print(f"Loading sklearn model from: {logged_model_uri}")
    loaded_sklearn_pipeline = mlflow.sklearn.load_model(logged_model_uri)
    print("sklearn model loaded successfully.")

    # --- Make Predictions ---
    print("Predicting using the loaded sklearn pipeline...")
    # Predict on the test DataFrame (the pipeline will handle preprocessing)
    predictions = loaded_sklearn_pipeline.predict(df_test)
    print("Predictions completed.")
    print("First 5 predictions:", predictions[:5])

    # --- Create Submission File ---
    print("Creating submission DataFrame...")
    # Ensure the 'Id' column exists in the test data
    if 'Id' not in df_test.columns:
        print("Error: 'Id' column not found in test data!")
        exit()

    # Create the DataFrame in the required format (Id, SalePrice)
    submission_df = pd.DataFrame({
        'Id': df_test['Id'],
        'SalePrice': predictions
    })

    # Define output path
    output_dir = './results' # Define a directory for results
    os.makedirs(output_dir, exist_ok=True) # Create the directory if it doesn't exist
    submission_filename = os.path.join(output_dir, 'submission_predictions.csv')

    # Save the DataFrame to CSV
    submission_df.to_csv(submission_filename, index=False)
    print(f"Submission file saved successfully to: {submission_filename}")
    print("\nPreview of submission file:")
    print(submission_df.head())

except Exception as e:
    print(f"An error occurred during model loading or prediction: {e}")
    # More detailed error traceback will be printed if the exception occurs


Loading sklearn model from: runs:/2aa7b5ccbf3b42259fa7efe5be192ee6/model_pipeline
sklearn model loaded successfully.
Predicting using the loaded sklearn pipeline...
[HighCorrelationRemover Transform] Dropped columns: ['1stFlrSF', 'Alley_Pave', 'BasementSurfaceRatio', 'BldgType_1Fam', 'BsmtCond_TA', 'BsmtFinType2_Unf', 'BsmtQual_TA', 'CentralAir_Y', 'Electrical_SBrkr', 'ExterCond_TA', 'ExterQual_TA', 'Exterior2nd_AsbShng', 'Exterior2nd_CBlock', 'Exterior2nd_CmentBd', 'Exterior2nd_HdBoard', 'Exterior2nd_MetalSd', 'Exterior2nd_Plywood', 'Exterior2nd_Stucco', 'Exterior2nd_VinylSd', 'Exterior2nd_Wd Sdng', 'FireplaceQu_TA', 'Foundation_PConc', 'Foundation_Slab', 'GarageArea', 'GarageCond_Ex', 'GarageCond_TA', 'GarageQual_TA', 'GarageType_Detchd', 'GarageYrBlt', 'Heating_GasW', 'HouseStyle_1Story', 'HouseStyle_2Story', 'KitchenQual_TA', 'LandSlope_Mod', 'LotConfig_Inside', 'LotShape_Reg', 'MSZoning_RM', 'MasVnrType_Stone', 'MiscFeature_Gar2', 'Neighborhood_Somerst', 'PavedDrive_Y', 'PoolQC_Fa