<a href="https://colab.research.google.com/github/primalbioinformatics/drug-design-2024/blob/main/free_wilson_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Create the dataset
data = {
    'Compound': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Substituent A': [1, 0, 0, 0, 1, 1, 0, 0, 1, 1],
    'Substituent B': [0, 1, 0, 0, 1, 0, 1, 0, 1, 1],
    'Substituent C': [0, 0, 1, 0, 0, 1, 1, 1, 1, 0],
    'Substituent D': [0, 0, 0, 1, 0, 0, 0, 1, 0, 1],
    'Biological Activity': [6.5, 6.8, 7.0, 6.0, 7.1, 7.3, 7.5, 6.7, 7.8, 6.9]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Define the features (X) and target variable (y)
X = df[['Substituent A', 'Substituent B', 'Substituent C', 'Substituent D']]
y = df['Biological Activity']

# Add a constant column for the intercept
X = np.c_[np.ones(X.shape[0]), X]  # Add a column of ones to include the intercept in the model

# Perform the regression using NumPy's linear algebra module
X_transpose = np.transpose(X)
beta = np.linalg.inv(X_transpose @ X) @ X_transpose @ y

# Extract coefficients
intercept = beta[0]
coefficients = beta[1:]

# Print the results
print(f'Intercept: {intercept:.2f}')
print(f'Substituent A coefficient: {coefficients[0]:.2f}')
print(f'Substituent B coefficient: {coefficients[1]:.2f}')
print(f'Substituent C coefficient: {coefficients[2]:.2f}')
print(f'Substituent D coefficient: {coefficients[3]:.2f}')

# Optionally, you can use sklearn's LinearRegression for comparison
model = LinearRegression()
model.fit(X[:, 1:], y)  # Use X[:, 1:] to exclude the intercept column in sklearn
print("\nResults using sklearn:")
print(f'Intercept: {model.intercept_:.2f}')
print(f'Substituent A coefficient: {model.coef_[0]:.2f}')
print(f'Substituent B coefficient: {model.coef_[1]:.2f}')
print(f'Substituent C coefficient: {model.coef_[2]:.2f}')
print(f'Substituent D coefficient: {model.coef_[3]:.2f}')


Intercept: 6.23
Substituent A coefficient: 0.31
Substituent B coefficient: 0.56
Substituent C coefficient: 0.73
Substituent D coefficient: -0.23

Results using sklearn:
Intercept: 6.23
Substituent A coefficient: 0.31
Substituent B coefficient: 0.56
Substituent C coefficient: 0.73
Substituent D coefficient: -0.23
