# Multiple Linear Regression
* Continuous dependent variable based on **two or more independent variables**

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
penguins = sns.load_dataset("penguins")

In [3]:
penguins = penguins[["body_mass_g", "bill_length_mm", "sex", "species"]]
penguins.columns = ["body_mass_g", "bill_length_mm", "gender", "species"]
penguins.dropna(inplace=True)

In [4]:
penguins.head()

Unnamed: 0,body_mass_g,bill_length_mm,gender,species
0,3750.0,39.1,Male,Adelie
1,3800.0,39.5,Female,Adelie
2,3250.0,40.3,Female,Adelie
4,3450.0,36.7,Female,Adelie
5,3650.0,39.3,Male,Adelie


In [5]:
# divide data into X variable and Y variable
penguins_X = penguins[["bill_length_mm", "gender", "species"]]
penguins_y = penguins[["body_mass_g"]]

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(penguins_X, penguins_y, test_size = 0.3, random_state = 42)

In [7]:
from statsmodels.formula.api import ols

ols_formula = "body_mass_g ~ bill_length_mm + C(gender) + C(species)"
ols_data = pd.concat([X_train, y_train], axis=1)

OLS = ols(formula = ols_formula, data = ols_data)

In [8]:
model = OLS.fit()

In [9]:
model.summary()

0,1,2,3
Dep. Variable:,body_mass_g,R-squared:,0.85
Model:,OLS,Adj. R-squared:,0.847
Method:,Least Squares,F-statistic:,322.6
Date:,"Wed, 11 Oct 2023",Prob (F-statistic):,1.31e-92
Time:,22:32:35,Log-Likelihood:,-1671.7
No. Observations:,233,AIC:,3353.0
Df Residuals:,228,BIC:,3371.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2032.2111,354.087,5.739,0.000,1334.510,2729.913
C(gender)[T.Male],528.9508,55.105,9.599,0.000,420.371,637.531
C(species)[T.Chinstrap],-285.3865,106.339,-2.684,0.008,-494.920,-75.853
C(species)[T.Gentoo],1081.6246,94.953,11.391,0.000,894.526,1268.723
bill_length_mm,35.5505,9.493,3.745,0.000,16.845,54.256

0,1,2,3
Omnibus:,0.339,Durbin-Watson:,1.948
Prob(Omnibus):,0.844,Jarque-Bera (JB):,0.436
Skew:,0.084,Prob(JB):,0.804
Kurtosis:,2.871,Cond. No.,798.0
