In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv('USA_Housing.csv')  # Replace with actual file path

# Drop the address column
df.drop('Address', axis=1, inplace=True)

# Create binary classification target
threshold = df['Price'].median()
df['Price_Category'] = df['Price'].apply(lambda x: 'High' if x > threshold else 'Low')

# Drop original price column
df.drop('Price', axis=1, inplace=True)

# Split features and target
X = df.drop('Price_Category', axis=1)
y = df['Price_Category']

# Encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train Random Forest
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))


Confusion Matrix:
 [[460  61]
 [ 44 435]]

Classification Report:
               precision    recall  f1-score   support

        High       0.91      0.88      0.90       521
         Low       0.88      0.91      0.89       479

    accuracy                           0.90      1000
   macro avg       0.89      0.90      0.89      1000
weighted avg       0.90      0.90      0.90      1000

