In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px

import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scipy.stats import uniform

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

In [None]:
student_data = pd.read_csv('dataset.csv')

In [None]:
# check the shape of the data set(no. of rows and columns )
student_data.shape

In [None]:
# check the columns name
student_data.columns

In [None]:
# check how data looks
student_data.head(5)

In [None]:
# check the data mathematically
student_data.describe()

In [None]:
# check if there are null values
print(student_data.isnull().sum())

In [None]:
student_data = student_data[['Mother\'s occupation', 'Father\'s occupation','Debtor', 'Tuition fees up to date', 'Scholarship holder', 'Target']]


In [None]:
student_data['Target']

In [None]:
# check the target
student_data['Target'].unique()

In [None]:
# chenge the target value in to numarical
student_data['Target'] = student_data['Target'].map({
    'Dropout':0,
    'Enrolled':1,
    'Graduate':2
})

In [None]:
# chech if Target colunm filled with 0,1 and 2
student_data

In [None]:
# Extract Input & Output Columns
x = student_data.iloc[:,0:5]
y = student_data.iloc[:,-1]


#Splitting the data into Training & Testing Data
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

In [None]:
# check the shapes of the splitted data
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.svm import SVC

# Scale the data using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the SVM model using the selected variables as input features and the "Target" column as the target variable
from sklearn.svm import SVC
svm = SVC(kernel='linear',C=100)
svm.fit(X_train, y_train)

In [None]:
# Evaluate the model's performance using the testing set and appropriate metrics
y_pred = svm.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred, average='weighted'))
print('Recall:', recall_score(y_test, y_pred, average='weighted'))
print('F1 score:', f1_score(y_test, y_pred, average='weighted'))

In [None]:
# Displaying the accuracy score and report with precison, recall and f1-scores
print('Accuracy score: ',accuracy_score(y_test,y_pred))

print(classification_report(y_pred, y_test))


# Displaying the results in a confusion matrix to identify where more data may be needed
mat = confusion_matrix(y_test, y_pred)
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False)
plt.xlabel('true label')
plt.ylabel('predicted label')

In [None]:
# Define the parameter distributions to search over
param_distributions = {
    'C': uniform(0.1, 10),
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto'] + list(np.arange(0.01, 1, 0.01)),
}

# Create the RandomizedSearchCV object with 5-fold cross-validation and 100 iterations
random_search = RandomizedSearchCV(svm, param_distributions, n_iter=100, cv=5)

# Fit the RandomizedSearchCV object to the training data
random_search.fit(X_train, y_train)

# Print the best hyperparameters found by RandomizedSearchCV
print('Best hyperparameters:', random_search.best_params_)

C = random_search.best_params_['C']
G = random_search.best_params_['gamma']
K = random_search.best_params_['kernel']

In [None]:
# check the accuracy again with RandomizedSearchCV generated values
svm = SVC(C=C,gamma=G,kernel=K)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))