In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.model_selection import train_test_split
import pandas as pd

In [None]:
data = pd.read_csv('../house_data.csv')
data.head()

In [None]:
all_features = list(data.columns.drop(['id', 'price', 'date']))
print(len(all_features))
X_full = data[all_features]
y_full = data['price']

In [None]:
selector = SelectKBest(score_func=f_regression, k=5)
selector.fit(X_full, y_full)

In [None]:
mask = selector.get_support()
top_features = X_full.columns[mask].tolist()
print('Selected Top 5 features: ', top_features)
X_selected = X_full[top_features]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_selected, y_full, test_size=0.20, random_state=42)
print('Train Shape: ', X_train.shape)
print('Test Shape: ', X_test)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_predictions = model.predict(X_test)

In [None]:
print("Slope: ", model.coef_)
print("Intercept: ", model.intercept_)
print('R2 Score: ', r2_score(y_test, y_predictions))
print('Mean square error: ', mean_squared_error(y_test, y_predictions))
print('Mean absolute error: ', mean_absolute_error(y_test, y_predictions))