In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Load data from CSV
data = pd.read_csv('train.csv')

# Select relevant features
selected_features = ['LotArea', 'OverallQual', 'YearBuilt', 'TotRmsAbvGrd', 'SalePrice']
data = data[selected_features]

# Check for missing values
print(data.isnull().sum())

# Separate features (X) and target variable (y)
X = data.drop('SalePrice', axis=1)
y = data['SalePrice']

# Create preprocessing pipeline
numeric_features = ['LotArea', 'OverallQual', 'YearBuilt', 'TotRmsAbvGrd']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features)
    ])

# Create a full prediction pipeline
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the model on the training data
model.fit(X_train, y_train)

# Predict on the test data
y_pred = model.predict(X_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Define new house features
new_house = pd.DataFrame({
    'LotArea': [1000],
    'OverallQual': [6],
    'YearBuilt': [2004],
    'TotRmsAbvGrd': [7]
})

# Predict the price of the new house
predicted_price = model.predict(new_house)

print(f'Predicted Price: {predicted_price[0]}')


LotArea         0
OverallQual     0
YearBuilt       0
TotRmsAbvGrd    0
SalePrice       0
dtype: int64
Mean Squared Error: 2103725412.1823704
R-squared: 0.7257319499382997
Predicted Price: 186501.79576318327
