In [1]:
import pandas as pd
import numpy as np

# Set the seed for reproducibility
np.random.seed(0)

# Creating the DataFrame
data = {
    'X1': np.random.randint(1, 100, 12),
    'X2': np.random.randint(1, 100, 12),
}

df = pd.DataFrame(data)

# Assuming a linear relation: Y = 2*X1 + 3*X2 + some_noise
df['Y'] = round(2*df['X1'] + 3*df['X2'] + np.random.normal(0, 10, 12))

# Display the DataFrame
df


Unnamed: 0,X1,X2,Y
0,45,89,369.0
1,48,13,122.0
2,65,59,311.0
3,68,66,333.0
4,68,40,270.0
5,10,88,283.0
6,84,47,329.0
7,22,89,313.0
8,37,82,326.0
9,88,38,306.0


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# (Previous code for generating the DataFrame goes here)

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df[['X1', 'X2']], df['Y'], test_size=0.2, random_state=0)

# Creating the model
model = LinearRegression()

# Fitting the model with the training data
model.fit(X_train, y_train)

# Making predictions on the test data
y_pred = model.predict(X_test)

# Printing coefficients
print(f"Coefficients: {model.coef_}")
print(f"Intercept: {model.intercept_}")

# Printing metrics
print(f"Mean Squared Error (MSE) on Test Data: {mean_squared_error(y_test, y_pred)}")
print(f"R^2 Score on Test Data: {r2_score(y_test, y_pred)}")


Coefficients: [2.31339084 3.19612503]
Intercept: -23.23869013599989
Mean Squared Error (MSE) on Test Data: 148.93258943139617
R^2 Score on Test Data: 0.9573693370370026


In [3]:
# Getting the coefficients and intercept
intercept = model.intercept_
coefficients = model.coef_

print(f"Y = {intercept:.2f} + {coefficients[0]:.2f} * X1 + {coefficients[1]:.2f} * X2")

Y = -23.24 + 2.31 * X1 + 3.20 * X2


In [4]:
import statsmodels.api as sm

# Adding a constant to the model (intercept)
X_train_sm = sm.add_constant(X_train)

# Fitting the OLS (Ordinary Least Squares) model
model = sm.OLS(y_train, X_train_sm).fit()

# Displaying the summary which includes the t-tests and F-test
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.990
Model:                            OLS   Adj. R-squared:                  0.987
Method:                 Least Squares   F-statistic:                     310.6
Date:                Tue, 31 Oct 2023   Prob (F-statistic):           8.76e-07
Time:                        08:34:34   Log-Likelihood:                -29.842
No. Observations:                   9   AIC:                             65.68
Df Residuals:                       6   BIC:                             66.28
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -23.2387     14.189     -1.638      0.1

