## Multi-Variable Linear Regression

In [27]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset from a CSV file
df = pd.read_csv('Housing.csv')

# Initialize the LabelEncoder
le = LabelEncoder()

# List of columns to be encoded
col_encode = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']

# Encode each specified column with LabelEncoder
for col in col_encode:
    df[col] = le.fit_transform(df[col])

# Split the dataset into features (X) and target (y)
x = df.iloc[:, 1:]  # Features (all columns except the first one)
y = df.iloc[:, 0]   # Target (the first column)

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=40)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(x_train, y_train)

# Predict the target values for the test set
y_pred = model.predict(x_test)

# Print the intercept and coefficients of the model
print("\nModel Summary:")
print(f"Intercept: {model.intercept_}")
print(f"Coefficients: {model.coef_}")

# Print the Mean Squared Error of the predictions
print(f"\nMean Squared Error (MSE): {mean_squared_error(y_test,y_pred)}")

# Creating new data with the same feature names
feature_names = x_train.columns
new_data = pd.DataFrame([[7420, 4, 2, 3, 1, 0, 0, 0, 1, 2, 1, 0]], columns=feature_names)

# Predict the target value for a new data point
y_pred1 = model.predict(new_data)
print(f"\nPrediction for new data point {new_data.values} is : {y_pred1[0]:.2f}")



Model Summary:
Intercept: 486676.47286567464
Coefficients: [ 2.37160541e+02  1.93254511e+04  1.06465203e+06  4.01395392e+05
  3.68759182e+05  3.80783834e+05  3.02838229e+05  8.84538857e+05
  8.51495025e+05  2.59745376e+05  5.28750146e+05 -2.05653727e+05]

Mean Squared Error (MSE): 1699319415288.2285

Prediction for new data point [[7420    4    2    3    1    0    0    0    1    2    1    0]] is : 7925694.83
