In [1]:
# Data for each group
placebo = [38, 47, 39, 25, 42]
low_dose = [22, 19, 8, 23, 31]
moderate_dose = [14, 26, 11, 18, 5]

# Calculate the mean for each group
mean_placebo = sum(placebo) / len(placebo)
mean_low_dose = sum(low_dose) / len(low_dose)
mean_moderate_dose = sum(moderate_dose) / len(moderate_dose)

# Calculate the overall mean (Grand Mean)
grand_mean = (sum(placebo) + sum(low_dose) + sum(moderate_dose)) / (len(placebo) + len(low_dose) + len(moderate_dose))

# Calculate the Sum of Squares Between Groups (SSB)
ssb = (len(placebo) * (mean_placebo - grand_mean)**2) + (len(low_dose) * (mean_low_dose - grand_mean)**2) + (len(moderate_dose) * (mean_moderate_dose - grand_mean)**2)

# Calculate the Sum of Squares Within Groups (SSW)
ssw = sum((x - mean_placebo)**2 for x in placebo) + sum((x - mean_low_dose)**2 for x in low_dose) + sum((x - mean_moderate_dose)**2 for x in moderate_dose)

# Calculate the degrees of freedom (df) for both Between Groups (dfB) and Within Groups (dfW)
dfB = 3 - 1  # Number of groups - 1
dfW = len(placebo) + len(low_dose) + len(moderate_dose) - 3  # Total sample size - Number of groups

# Calculate the Mean Square for both Between Groups (MSB) and Within Groups (MSW)
msb = ssb / dfB
msw = ssw / dfW

# Calculate the F-statistic
f_statistic = msb / msw


# Calculate the p-value using the F-distribution
import scipy.stats as stats

p_value = 1 - stats.f.cdf(f_statistic, dfB, dfW)

# Print the results
print("F-statistic:", f_statistic)
print("P-value:", p_value)

F-statistic: 11.266565503287813
P-value: 0.0017606447622791066


In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

# Example data
data = {
    'Hours': [5, 10, 15, 20, 25, 30, 35, 40],
    'Monthly_Expenses': [220, 340, 420, 530, 610, 700, 750, 800]
}

# Convert data to a DataFrame
df = pd.DataFrame(data)

# Define the dependent variable (Y) and independent variable (X)
Y = df['Monthly_Expenses']
X = sm.add_constant(df['Hours'])  # Adding a constant for the intercept term

# Fit the OLS model
model = sm.OLS(Y, X).fit()

# Generate the OLS table
ols_table = model.summary()

# Print the OLS table
print(ols_table)

                            OLS Regression Results                            
Dep. Variable:       Monthly_Expenses   R-squared:                       0.985
Model:                            OLS   Adj. R-squared:                  0.982
Method:                 Least Squares   F-statistic:                     382.4
Date:                Thu, 20 Jul 2023   Prob (F-statistic):           1.16e-06
Time:                        21:08:20   Log-Likelihood:                -36.782
No. Observations:                   8   AIC:                             77.56
Df Residuals:                       6   BIC:                             77.72
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        169.6429     21.611      7.850      0.0



In [3]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Set a random seed for reproducibility
np.random.seed(42)

# Number of data points
n = 100

# Generate independent variables X1, X2, and X3
X1 = np.random.rand(n) * 10
X2 = np.random.rand(n) * 5
X3 = np.random.rand(n) * 2

# Generate dependent variable Y with a linear relationship and some random noise
Y = 2 * X1 + 3 * X2 + 5 * X3 + np.random.normal(0, 2, n)

# Create a DataFrame to store the data
data = {
    'Y': Y,
    'X1': X1,
    'X2': X2,
    'X3': X3,
}
df = pd.DataFrame(data)

# Display the first few rows of the DataFrame
print(df.head())

# Split the data into independent variables (X) and the dependent variable (y)
X = df[['X1', 'X2', 'X3']]
y = df['Y']

# Create a LinearRegression model
model = LinearRegression()

# Fit the model to the data
model.fit(X, y)

# Get the coefficients (slope) and the intercept of the linear regression model
coefficients = model.coef_
intercept = model.intercept_

# Display the coefficients and the intercept
print("Coefficients:", coefficients)
print("Intercept:", intercept)

# Predict the values using the trained model
y_pred = model.predict(X)

# Calculate the Mean Squared Error (MSE) and Mean Absolute Error (MAE) to evaluate the model's performance
mse = mean_squared_error(y, y_pred)
mae = mean_absolute_error(y, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)


           Y        X1        X2        X3
0  14.473700  3.745401  0.157146  1.284063
1  28.098641  9.507143  3.182052  0.168280
2  25.259394  7.319939  1.571780  0.323257
3  29.855110  5.986585  2.542853  1.797108
4  18.747875  1.560186  4.537832  1.212858
Coefficients: [2.08318585 3.08341989 5.32934818]
Intercept: -0.8000557539444237
Mean Squared Error: 3.713162128999969
Mean Absolute Error: 1.5037565432194189


In [5]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.anova import anova_lm

# Set a random seed for reproducibility
np.random.seed(42)

# Number of data points
n = 100

# Generate independent variables X1, X2, and X3
X1 = np.random.rand(n) * 10
X2 = np.random.rand(n) * 5
X3 = np.random.rand(n) * 2

# Generate dependent variable Y with a linear relationship and some random noise
Y = 2 * X1 + 3 * X2 + 5 * X3 + np.random.normal(0, 2, n)

# Create a DataFrame to store the data
data = {
    'Y': Y,
    'X1': X1,
    'X2': X2,
    'X3': X3,
}
df = pd.DataFrame(data)

# Split the data into independent variables (X) and the dependent variable (y)
X = df[['X1', 'X2', 'X3']]
y = df['Y']

# Add a constant column for the intercept term
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X).fit()

# Generate the ANOVA table
anova_table = sm.stats.anova_lm(model)

# Display the ANOVA table
print(anova_table)


AttributeError: 'PandasData' object has no attribute 'design_info'