In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_california_housing

import pandas as pd

In [2]:
housing = fetch_california_housing()

In [3]:
df = pd.DataFrame(housing.data, columns = housing.feature_names)

In [4]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [5]:
target = pd.DataFrame(housing.target, columns=['MEDV'])

In [6]:
data = pd.concat([df, target], axis=1)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(df, target, test_size=0.3, random_state=46)

In [8]:
model = LinearRegression()

In [9]:
model.fit(X_train, y_train)

In [10]:
y_pred = model.predict(X_test)
y_pred

array([[2.81645359],
       [2.2860135 ],
       [1.63707276],
       ...,
       [2.39232926],
       [3.03199074],
       [2.70376808]])

In [11]:
print(f'Intercept:{model.intercept_}')
print(f'Coefficients:{model.coef_}')

Intercept:[-35.72749939]
Coefficients:[[ 4.44232070e-01  9.78573150e-03 -1.21606120e-01  6.40895333e-01
  -4.66625735e-06 -3.80061533e-03 -4.04650839e-01 -4.19742999e-01]]


In [12]:
print(f'Mean squared error:{mean_squared_error(y_test,y_pred)}')
print(f'R-squared:{r2_score(y_test,y_pred)}')

Mean squared error:0.5282392649244279
R-squared:0.6062353546443202
