In [64]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer


In [66]:
# Load the dataset
data = pd.read_csv('C:/Users/SHYAM/Downloads/sample_submission.csv')


In [58]:
# Handling Missing Values
data.dropna(inplace=True)


In [71]:
# Selecting features and target variable
X = data[['Id']]  # Assuming these are the relevant features
y = data['SalePrice']


In [72]:
# Identify categorical features and apply OneHotEncoding
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
print("Categorical columns:", categorical_cols)

if categorical_cols:
    column_transformer = ColumnTransformer(
        transformers=[
            ('encoder', OneHotEncoder(), categorical_cols)
        ],
        remainder='passthrough'
    )
    X_encoded = column_transformer.fit_transform(X)
else:
    X_encoded = X


Categorical columns: []


In [73]:
# Check if X_encoded is empty or not
print("Shape of X_encoded after encoding:", X_encoded.shape)

if X_encoded.shape[0] == 0:
    raise ValueError("After encoding, X_encoded has 0 samples.")


Shape of X_encoded after encoding: (1459, 1)


In [74]:
# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_encoded)


In [75]:
# Check if X_scaled is empty or not
print("Shape of X_scaled after scaling:", X_scaled.shape)

if X_scaled.shape[0] == 0:
    raise ValueError("After scaling, X_scaled has 0 samples.")


Shape of X_scaled after scaling: (1459, 1)


In [76]:
# Splitting the scaled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [77]:
# Creating and training the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)


In [78]:
# Making predictions
y_pred = model.predict(X_test)


In [81]:
# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 239521036.513574
