<a href="https://colab.research.google.com/github/rohitarer/PRODIGY_ML_01/blob/main/PRODIGY_ML_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [4]:
# Load the dataset
data = pd.read_csv('./train.csv')

In [5]:
# Define the features and target variable based on the dataset structure
features = ['GrLivArea', 'BedroomAbvGr', 'FullBath', 'OverallQual', 'YearBuilt', 'TotalBsmtSF',
            '1stFlrSF', '2ndFlrSF', 'GarageCars', 'GarageArea']
target = 'SalePrice'

In [6]:
# Prepare the data
X = data[features]
y = data[target]

In [7]:
# Handle missing values and scaling
numeric_features = features
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features)])

In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Apply preprocessing
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

In [10]:
# Create the linear regression model
model = LinearRegression()

In [11]:
# Train the model
model.fit(X_train, y_train)

In [12]:
# Make predictions
y_pred = model.predict(X_test)

In [13]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Squared Error: 1507720211.0447428
R-squared: 0.8034346688369


In [14]:
# Display the coefficients
coefficients = pd.DataFrame(model.coef_, features, columns=['Coefficient'])
print(coefficients)

               Coefficient
GrLivArea     18637.940774
BedroomAbvGr  -4983.463774
FullBath      -2058.840206
OverallQual   27273.265040
YearBuilt     10566.715445
TotalBsmtSF    6479.505537
1stFlrSF      12491.790563
2ndFlrSF       9054.687706
GarageCars     8690.232344
GarageArea     2229.860744


In [16]:
# Display the score
score = model.score(X_test, y_test)
score

0.8034346688369