In [2]:
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.model_selection import train_test_split

# ✅ Load dataset
dataset_path = '/Users/tharun/Desktop/hestia dataset.csv'  # Change if your CSV path is different
df = pd.read_csv(dataset_path)

# ✅ Encode categorical columns
location_encoder = LabelEncoder()
property_type_encoder = LabelEncoder()

df['location'] = location_encoder.fit_transform(df['location'])
df['property_type'] = property_type_encoder.fit_transform(df['property_type'])

# ✅ Generate min/max price range (±20%)
df['price_min'] = df['price'] * 0.8
df['price_max'] = df['price'] * 1.2

# ✅ Create binary target: 1 = anomaly, 0 = normal
df['target'] = ((df['price'] < df['price_min']) | (df['price'] > df['price_max'])).astype(int)

# ✅ Feature columns
features = ['location', 'area_size', 'property_type', 'price_min', 'price_max']
X = df[features]
y = df['target']

# ✅ Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ✅ Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# ✅ Train models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

iso_forest = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
iso_forest.fit(X_train)

# ✅ Save models and encoders
save_path = '/Users/Hestia 2/RealEstate/untitled folder'
os.makedirs(save_path, exist_ok=True)

joblib.dump(rf_model, os.path.join(save_path, 'random_forest_model.pkl'))
joblib.dump(iso_forest, os.path.join(save_path, 'isolation_forest_model.pkl'))
joblib.dump(scaler, os.path.join(save_path, 'scaler.pkl'))
joblib.dump(location_encoder, os.path.join(save_path, 'location_encoder.pkl'))
joblib.dump(property_type_encoder, os.path.join(save_path, 'property_type_encoder.pkl'))

print("✅ All models and encoders saved to:", save_path)

✅ All models and encoders saved to: /Users/Hestia 2/RealEstate/untitled folder
