**Logistic Regression**

In [None]:
!pip install numpy pandas matplotlib scikit-learn




In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.datasets import make_classification


In [None]:
data=pd.read_csv('Final_db.csv')


In [None]:
data.drop('Name', axis=1)

Unnamed: 0,ALogP,nHBAcc_Lipinski,nHBDon_Lipinski,MW
0,0.5864,6,0,295.141973
1,1.0872,6,0,323.173273
2,0.8368,6,0,309.157623
3,1.3376,6,0,337.188923
4,0.0844,6,2,267.110673
...,...,...,...,...
59,-0.1673,4,1,237.136494
60,1.5099,4,1,251.152143
61,-0.4489,5,3,238.131742
62,-0.5991,4,1,221.105193


In [None]:
X =data.drop('ALogP', axis=1)
y = data['ALogP'].astype(int)


# training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# logistic regression model
model = LogisticRegression()

# Training the model
model.fit(X_train, y_train)

# predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy and F1 score
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, pos_label=0)

print(f'Accuracy: {accuracy:.4f}')
print(f'F1 Score: {f1:.4f}')

Accuracy: 0.9231
F1 Score: 0.9600


**Naive Bayes**

In [119]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import matplotlib.pyplot as plt

df = pd.read_csv('Final_db.csv')

# training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[['MW', 'nHBAcc_Lipinski', 'nHBDon_Lipinski']], df['ALogP'], test_size=0.2, random_state=42)

# Linear Regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# predictions on the test set
y_pred = regressor.predict(X_test)

# Evaluating the regression model
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# threshold for correctness
threshold = 0.5

# Converting predictions to binary
binary_predictions = np.abs(y_pred - y_test) <= threshold

# Accuracy as the percentage of correct predictions
accuracy = np.sum(binary_predictions) / len(y_test)

print(f'Accuracy: {accuracy * 100:.2f}%')

# Convert true labels to binary (1 if actual is within threshold, 0 otherwise)
binary_true_labels = np.abs(y_test) <= threshold

# Calculate F1 score
precision = np.sum(binary_predictions & binary_true_labels) / np.sum(binary_predictions)
recall = np.sum(binary_predictions & binary_true_labels) / np.sum(binary_true_labels)

f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0

print(f'F1 Score: {f1_score:.2f}')


Mean Absolute Error: 0.42238221181232427
Mean Squared Error: 0.2539971828684636
R-squared: 0.11726374883344548
Accuracy: 61.54%
F1 Score: 0.59


**K-Nearest neighbour

In [118]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score


data = pd.read_csv('Final_db.csv')

data.drop('Name', axis=1)
features = data.drop('ALogP', axis=1)
target = data['ALogP'].astype(str)

# training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# number of neighbors (k) - Highest accuracy
k = 5

# KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Train the model
knn_classifier.fit(X_train, y_train)

# predictions on the test set
y_pred = knn_classifier.predict(X_test)

# accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

# F1 score
f1 = f1_score(y_test, y_pred, average='weighted')  # 'weighted' for multiclass classification
print(f'F1 Score: {f1:.4f}')


Accuracy: 0.0769
F1 Score: 0.0385


**Random Forest**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

data.drop('Name', axis=1)
features = data.drop('ALogP', axis=1)
target = data['ALogP']

X = data.drop('ALogP', axis=1)
y = data['']

# training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf_classifier.fit(X_train, y_train)

# predictions on the test set
y_pred = rf_classifier.predict(X_test)

# accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 Score: {f1:.2f}')


Accuracy: 0.77
F1 Score (Micro): 0.77
F1 Score (Macro): 0.50


**SVM**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np
import pandas as pd

data = pd.read_csv('Final_db.csv')

data.drop('Name', axis=1)
features = data.drop('ALogP', axis=1)
target = data['ALogP'].astype(str)

# training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# SVM model
svm_model = make_pipeline(StandardScaler(), SVC())

# Train the model
svm_model.fit(X_train, y_train)

# predictions on the test set
y_pred = svm_model.predict(X_test)

# Calculate and print accuracy and F1 score
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print(f'Accuracy: {accuracy:.4f}')
print(f'F1 Score: {f1:.4f}')


Accuracy: 0.6923
F1 Score: 0.6506
