In [None]:
!pip install imbalanced-learn

In [1]:
#Importing Libraries
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, roc_curve, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

#Loading the Dataset
data = pd.read_csv('Data/Clean_Dataset.csv')
print(data)


#Data Distribution
# Count of each class in the target variable
print(data['Suppression Status'].value_counts())

# Plot class distribution
sns.countplot(x='Suppression Status', data=data)
plt.title('Class Distribution')
plt.xlabel('Suppression Status')
plt.ylabel('Count')
plt.show()

# Relationship between weight and suppression status
pd.crosstab(data['Age'], data['Suppression Status']).plot(kind='bar', figsize=(10, 6))
plt.title('Age vs Suppression Status')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()

# Relationship between weight and suppression status
pd.crosstab(data['Years on ART'], data['Suppression Status']).plot(kind='bar', figsize=(10, 6))
plt.title('Years on ART vs Suppression Status')
plt.xlabel('Years on ART')
plt.ylabel('Count')
plt.show()

# Choosing Age and Years on ART as the features
X = data[['Age', 'Years on ART']]
y = data['Suppression Status']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

# Train the SVM classifier with an sigmoid kernel
svm_model = SVC(kernel='linear', probability=True, class_weight='balanced', random_state=42)
svm_model.fit(X_train, y_train)

# Create a meshgrid to plot decision boundary
h = .02  # Step size in the mesh
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1

xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

# Predict on the meshgrid
Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the decision boundary
plt.contourf(xx, yy, Z, alpha=0.8)

# Plot the training points
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolors='k', marker='o', s=100, cmap=plt.cm.Paired)

# Plot the test points
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, edgecolors='k', marker='x', s=100, cmap=plt.cm.Paired)

plt.title("SVM Decision Boundary with 'Age' and 'Years on ART'")
plt.xlabel('Age')
plt.ylabel('Years on ART')
plt.colorbar()
plt.show()

ModuleNotFoundError: No module named 'imblearn'

In [None]:
print(X_test)
y_pred = svm_model.predict(X_test)
print("Predictions made", y_pred)

In [None]:
new_data = [[27, 3]] 

# Example: Age = 70, Years on ART = 3

# Scale the new data using the same scaler used for training
new_data_scaled = scaler.transform(new_data)

# Make the prediction
new_prediction = svm_model.predict(new_data_scaled)


print("Prediction for the new data:", new_prediction)

In [None]:
#Model Evaluation
# Prediction using the test set
y_pred = svm_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")