In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

# Step 1: Load the dataset
path = '/content/sample_data/california_housing_test.csv'
data = pd.read_csv(path)

# Step 2: Check the columns and first few rows
print(data.head())
print(data.columns)

# Step 3: Update with the correct target column
# Assuming 'median_house_value' is the target
X = data.drop('median_house_value', axis=1)
y = data['median_house_value']

# Step 4: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 6: Initialize classifiers
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=42)
ada_clf = AdaBoostClassifier(n_estimators=100, random_state=42)

# Step 7: Train and predict
rf_clf.fit(X_train_scaled, y_train)
rf_pred = rf_clf.predict(X_test_scaled)

gb_clf.fit(X_train_scaled, y_train)
gb_pred = gb_clf.predict(X_test_scaled)

ada_clf.fit(X_train_scaled, y_train)
ada_pred = ada_clf.predict(X_test_scaled)

# Step 8: Voting Classifier
voting_clf = VotingClassifier(estimators=[('rf', rf_clf), ('gb', gb_clf), ('ada', ada_clf)], voting='hard')
voting_clf.fit(X_train_scaled, y_train)
voting_pred = voting_clf.predict(X_test_scaled)

# Step 9: Evaluation
print("\nRandom Forest Classifier:")
print(f"Accuracy: {accuracy_score(y_test, rf_pred)}")
print(classification_report(y_test, rf_pred))

print("\nGradient Boosting Classifier:")
print(f"Accuracy: {accuracy_score(y_test, gb_pred)}")
print(classification_report(y_test, gb_pred))

print("\nAdaBoost Classifier:")
print(f"Accuracy: {accuracy_score(y_test, ada_pred)}")
print(classification_report(y_test, ada_pred))

print("\nVoting Classifier (Random Forest + Gradient Boosting + AdaBoost):")
print(f"Accuracy: {accuracy_score(y_test, voting_pred)}")
print(classification_report(y_test, voting_pred))


   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -122.05     37.37                27.0       3885.0           661.0   
1    -118.30     34.26                43.0       1510.0           310.0   
2    -117.81     33.78                27.0       3589.0           507.0   
3    -118.36     33.82                28.0         67.0            15.0   
4    -119.67     36.33                19.0       1241.0           244.0   

   population  households  median_income  median_house_value  
0      1537.0       606.0         6.6085            344700.0  
1       809.0       277.0         3.5990            176500.0  
2      1484.0       495.0         5.7934            270500.0  
3        49.0        11.0         6.1359            330000.0  
4       850.0       237.0         2.9375             81700.0  
Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value'],
  




Random Forest Classifier:
Accuracy: 0.028333333333333332
              precision    recall  f1-score   support

     22500.0       0.00      0.00      0.00         1
     39800.0       0.00      0.00      0.00         0
     40000.0       0.00      0.00      0.00         1
     41500.0       0.00      0.00      0.00         1
     42500.0       0.00      0.00      0.00         1
     44400.0       0.00      0.00      0.00         0
     44600.0       0.00      0.00      0.00         1
     46300.0       0.00      0.00      0.00         1
     46500.0       0.00      0.00      0.00         1
     47500.0       0.00      0.00      0.00         2
     50800.0       0.00      0.00      0.00         1
     51600.0       0.00      0.00      0.00         0
     52400.0       0.00      0.00      0.00         0
     52500.0       0.00      0.00      0.00         1
     52600.0       0.00      0.00      0.00         2
     54200.0       0.00      0.00      0.00         0
     55000.0       0.00