# Multiple Regression

In [9]:
# Step 1: Import necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from sklearn.model_selection import train_test_split  # for splitting the data
import statsmodels.api as sm  # For OLS Regression

In [3]:
data = {

    'Size': [1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200],

    'Bedrooms': [3, 3, 3, 4, 4, 4, 5, 5],

    'Age': [10, 15, 10, 20, 30, 15, 20, 25],

    'Price': [300000, 320000, 340000, 360000, 400000, 370000, 420000, 430000]

}


In [6]:
# Convert data into a pandas DataFrame
df = pd.DataFrame(data)

In [7]:
# Features (X) - Size, Bedrooms, Age
X = df[['Size', 'Bedrooms', 'Age']]
# Target (Y) - Price
Y = df['Price']

In [8]:
# Step 3: Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)  # 75% train, 25% test

In [10]:
# Step 4: Create and train the Multiple Linear Regression model
model = LinearRegression()
model.fit(X_train, Y_train)

In [11]:
# Step 5: Make predictions on the test set
Y_pred = model.predict(X_test)

In [13]:
# Coefficients and intercept
print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)

Intercept: 33576.01713061816
Coefficients: [  182.01284797 -6252.67665953  1391.86295503]


In [12]:
# Step 6: Evaluate the model
mse = mean_squared_error(Y_test, Y_pred)  # Mean Squared Error
r2 = r2_score(Y_test, Y_pred)  # R² score

In [14]:
print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Squared Error: 299314729.30776143
R-squared: 0.5210964331075817


In [16]:
new_house = np.array([[1500,3,10]])
predicted_price = model.predict(new_house)
print(f"Predicted Price for new house: {predicted_price[0]}")

Predicted Price for new house: 301755.888650963




In [25]:
df_info=pd.read_csv("wine.csv")
df_info

Unnamed: 0,Wine,Alcohol,Malic.acid,Ash,Acl,Mg,Phenols,Flavanoids,Nonflavanoid.phenols,Proanth,Color.int,Hue,OD,Proline
0,1,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
2,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
174,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
175,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
176,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


In [19]:
df_info = pd.DataFrame(data)

In [26]:
print("Columns in DataFrame:", df_info.columns)

Columns in DataFrame: Index(['Wine', 'Alcohol', 'Malic.acid', 'Ash', 'Acl', 'Mg', 'Phenols',
       'Flavanoids', 'Nonflavanoid.phenols', 'Proanth', 'Color.int', 'Hue',
       'OD', 'Proline'],
      dtype='object')


In [28]:
# Step 1: Define Independent (X) and Dependent (Y) Variables
X = df_info.drop('Wine', axis=1)  
Y = df_info['Wine']

In [29]:
# Step 2: Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [34]:
# Step 3: Create a Linear Regression Model
model = LinearRegression()

In [35]:
# Step 4: Train the model
model.fit(X_train, Y_train)

In [36]:
# Step 5: Make predictions on the test set
Y_pred = model.predict(X_test)

In [37]:
# Step 6: Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(Y_test, Y_pred)
print(f"Mean Squared Error (MSE): {mse:.2f}")

Mean Squared Error (MSE): 0.07


In [40]:
# step 7: root mean sqare
r2 = r2_score(Y_test, Y_pred)  # R² score
print("R-squared:", r2)

R-squared: 0.8825140263270392
