In [1]:
from sklearn.datasets import fetch_california_housing

# Load the California housing dataset
housing_data = fetch_california_housing()

# Access the data
X = housing_data.data  # Features
y = housing_data.target  # Target variable

In [2]:
print(housing_data.DESCR)  # Print dataset description
print(X.shape)  # Print shape of feature matrix
print(y.shape)  # Print shape of target variable

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

:Number of Instances: 20640

:Number of Attributes: 8 numeric, predictive attributes and the target

:Attribute Information:
    - MedInc        median income in block group
    - HouseAge      median house age in block group
    - AveRooms      average number of rooms per household
    - AveBedrms     average number of bedrooms per household
    - Population    block group population
    - AveOccup      average number of household members
    - Latitude      block group latitude
    - Longitude     block group longitude

:Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived from the 1990 U.S. census, using one row per ce

In [3]:
dir(housing_data)

['DESCR', 'data', 'feature_names', 'frame', 'target', 'target_names']

In [4]:
housing_data.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [6]:
X.shape

(20640, 8)

In [7]:
y.shape

(20640,)

In [15]:
import pandas as pd
df_features = pd.DataFrame(X, columns=housing_data.feature_names)
df_prices = pd.DataFrame(y, columns=['Price'])
df = pd.concat([df_features,df_prices],axis=1)
df.head(10)
df.to_csv('california_housing.csv')

In [16]:
df.head(5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,Price
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422
