In [17]:
# IMPORT NECESSARY LIBRARIES
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

# Read data file using read_csv
df = pd.read_csv("Housing.csv")
print("Initial DataFrame:")
print(df.head())

# Initialize LabelEncoder
le = LabelEncoder()

# List of categorical columns to be encoded to integer values
col_encode = ["mainroad", "guestroom", "basement", "hotwaterheating", 
              "airconditioning", "prefarea", "furnishingstatus"]

# Apply LabelEncoder to each specified column
for col in col_encode:
    df[col] = le.fit_transform(df[col])

# Display the DataFrame after encoding
print("DataFrame after encoding:")
print(df.head())

# Define features (X) and target (y)
x = df.iloc[:, 1:]  # Features: all columns except the first one
print("Features (X):")
print(x.head())

y = df.iloc[:, 0]   # Target: the first column
print("Target (y):")
print(y.head())

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=40)

# Instantiate and train the Linear Regression model
model = LinearRegression()
model.fit(x_train, y_train)

# Predict the target values for the test set
y_pred = model.predict(x_test)
print("Predicted values:")
print(y_pred)

# Print model parameters and performance metrics
print(f'Intercept = {model.intercept_}')
print(f'Coefficients = {model.coef_}')
print(f'Mean Squared Error (MSE) = {mean_squared_error(y_test, y_pred)}')

# Predicting a specific value using the trained model
y_pred1 = model.predict([[7420, 4, 2, 3, 1, 0, 0, 0, 1, 2, 1, 0]])
print("Prediction for input [7420, 4, 2, 3, 1, 0, 0, 0, 1, 2, 1, 0]:")
print(y_pred1)


Initial DataFrame:
      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  
DataFrame after encoding:
      price  area  bedrooms  bathrooms  stories  mainr

