<a href="https://colab.research.google.com/github/mehdiiiii786/22006102-MEHDI_HADRI/blob/main/Projet_california_pricing_Mehdi_Hadri.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# 1. Load the Dataset
# We use the fetcher to get the data immediately
housing = fetch_california_housing()

# Create a readable DataFrame
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['PRICE'] = housing.target  # This is what we want to predict (in $100,000s)

print("--- First 5 rows of data ---")
print(df.head())

# 2. Split Data into Features (X) and Target (y)
X = df.drop('PRICE', axis=1) # The inputs (Income, Age, Rooms, etc.)
y = df['PRICE']              # The output (The Price)

# 3. Split into Training and Testing sets
# We train on 80% of the data and test on the unseen 20%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Initialize and Train the Model
# n_estimators=100 means we create 100 decision trees
model = RandomForestRegressor(n_estimators=100, random_state=42)

print("\nTraining the model... (this may take a second)")
model.fit(X_train, y_train)

# 5. Make Predictions
predictions = model.predict(X_test)

# 6. Evaluate the Model
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print("\n--- Model Evaluation ---")
print(f"Mean Absolute Error: {mae:.4f}")
print(f"R-squared Score: {r2:.4f}")
print(f"\nInterpretation: On average, our prediction is off by ${mae * 100000:.0f}.")

# Optional: Visualize Actual vs Predicted prices
plt.figure(figsize=(10, 6))
plt.scatter(y_test, predictions, alpha=0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2) # Perfect prediction line
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Actual vs Predicted Housing Prices')
plt.show()