<a href="https://colab.research.google.com/github/mehdiabbasidev/darsman-machine-learning/blob/main/SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Dataset download link:
https://drive.google.com/file/d/13kKragWNy0U1al-U0alNmgBuTtXbN0KB/view?usp=sharing

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
%matplotlib inline

In [None]:
import warnings
warnings.filterwarnings('ignore')

### Pulsar Dataset:
* Mean of the integrated profile: میانگین پروفایل یکپارچه (شدت پالس‌ها در یک دوره زمانی)
* Standard deviation of the integrated profile: انحراف معیار پروفایل یکپارچه (نوسانات شدت پالس‌ها)
* Excess kurtosis of the integrated profile: کشیدگی اضافی پروفایل یکپارچه (میزان تیز بودن توزیع شدت پالس‌ها)
* Skewness of the integrated profile: چولگی پروفایل یکپارچه (تقارن یا عدم تقارن توزیع شدت پالس‌ها)
* Mean of the DM-SNR curve: میانگین منحنی DM-SNR (تاخیر پراکندگی)
* Standard deviation of the DM-SNR curve: انحراف معیار منحنی DM-SNR (نوسانات شدت سیگنال به نویز)
* Excess kurtosis of the DM-SNR curve: کشیدگی اضافی منحنی DM-SNR (میزان تیز بودن توزیع شدت سیگنال به نویز)
* Skewness of the DM-SNR curve: چولگی منحنی DM-SNR (تقارن یا عدم تقارن توزیع شدت سیگنال به نویز)
* target_class: هست یا خیر pulsar اینکه ستاره

In [None]:
df = pd.read_csv('/content/drive/MyDrive/datasets/pulsar_stars.csv')
df.head()
df.shape

In [None]:
df.columns = ['IP Mean', 'IP Sd', 'IP Kurtosis', 'IP Skewness',
              'DM-SNR Mean', 'DM-SNR Sd', 'DM-SNR Kurtosis', 'DM-SNR Skewness', 'target_class']
df.head()

In [None]:
df['target_class'].value_counts()

In [None]:
df['target_class'].value_counts()/len(df)*100

In [None]:
df.info()
df.isnull().sum()
round(df.describe(),2)

In [None]:
import scipy.stats as stats

df['zscore_IP Mean'] = stats.zscore(df['IP Mean'])
outliers=df[(df['zscore_IP Mean']<-3) | (3 <df['zscore_IP Mean']) ]
outliers.shape

In [None]:
df=df.drop(['zscore_IP Mean'], axis=1)
df.head()

In [None]:
plt.figure(figsize=(24,20))

for i, col in enumerate(df.columns[:8], start=1):
    plt.subplot(4, 2, i)
    df.boxplot(column=col, ax=plt.gca())
    plt.title('')
    plt.ylabel(col)

In [None]:
X = df.drop(['target_class'], axis=1)
y = df['target_class']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
cols = X.columns
cols

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = pd.DataFrame(X_train, columns=[cols])
X_test = pd.DataFrame(X_test, columns=[cols])
X_train, X_test

In [None]:
svc=SVC()
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
print(f"accuracy score for c=1 : {accuracy_score(y_test, y_pred):0.4f}")

In [None]:
svc=SVC(C=100)
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
print(f"accuracy score for C=100 : {accuracy_score(y_test, y_pred):0.4f}")

In [None]:
svc=SVC(C=1000)
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
print(f"accuracy score for C=1000 : {accuracy_score(y_test, y_pred):0.4f}")

In [None]:
linear_svc1=SVC(kernel='linear')
linear_svc1.fit(X_train,y_train)
y_pred=linear_svc1.predict(X_test)
print(f"accuracy score for kernel='linear' and C=1 : {accuracy_score(y_test, y_pred):0.4f}")

In [None]:
linear_svc100=SVC(C=100,kernel='linear')
linear_svc100.fit(X_train, y_train)
y_pred=linear_svc100.predict(X_test)
print(f"accuracy score for kernel='linear' and C=100 : {accuracy_score(y_test, y_pred):0.4f}")

In [None]:
linear_svc1000=SVC(C=1000,kernel='linear')
linear_svc1000.fit(X_train, y_train)
y_pred=linear_svc1000.predict(X_test)
print(f"accuracy score for kernel='linear' and C=1000 : {accuracy_score(y_test, y_pred):0.4f}")

In [None]:
poly_svc=SVC(kernel='poly', C=1)
poly_svc.fit(X_train,y_train)
y_pred=poly_svc.predict(X_test)
print(f"accuracy score for kernel='poly' and C=1 : {accuracy_score(y_test, y_pred):0.4f}")

In [None]:
poly_svc100=SVC(kernel='poly', C=100)
poly_svc100.fit(X_train,y_train)
y_pred=poly_svc100.predict(X_test)
print(f"accuracy score for kernel='poly' and C=100 : {accuracy_score(y_test, y_pred):0.4f}")

In [None]:
sigmoid_svc=SVC(kernel='sigmoid', C=1)
sigmoid_svc.fit(X_train,y_train)
y_pred=sigmoid_svc.predict(X_test)
print(f"accuracy score for kernel='sigmoid' and C=1 : {accuracy_score(y_test, y_pred):0.4f}")

In [None]:
sigmoid_svc100=SVC(kernel='sigmoid', C=100)
sigmoid_svc100.fit(X_train,y_train)
y_pred=sigmoid_svc100.predict(X_test)
print(f"accuracy score for kernel='sigmoid' and C=100 : {accuracy_score(y_test, y_pred):0.4f}")

### Check overfitting

In [None]:
print(f'Training set score: {svc.score(X_train, y_train):.4f}')
print(f'Test set score: {svc.score(X_test, y_test):.4f}')

print()

print(f'Training set score: {linear_svc100.score(X_train, y_train):.4f}')
print(f'Test set score: {linear_svc100.score(X_test, y_test):.4f}')

print()

print(f'Training set score: {linear_svc1000.score(X_train, y_train):.4f}')
print(f'Test set score: {linear_svc1000.score(X_test, y_test):.4f}')


### Null Accuracy

In [None]:
vc=y_test.value_counts()
vc

In [None]:
null_accuracy = (3306/(3306+274))
print(f'Null accuracy score: {null_accuracy:0.4f}')

## 20. Hyperparameter Optimization using GridSearch CV

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
svc=SVC()
param_grid = {
    'C': [1, 10, 100,1000],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf', 'poly']
}
grid_search = GridSearchCV(estimator = svc,
                           param_grid = param_grid,
                           scoring = 'accuracy',
                           cv = 5,
                           verbose=0)
grid_search.fit(X_train, y_train)

In [None]:
print(f"best score : {grid_search.best_score_:.4f}\n\n")
print(f"Best Parameters :\n{grid_search.best_params_}")

In [None]:
svc=SVC(C=100,kernel='rbf',gamma='scale')
svc.fit(X_train,y_train)
svc.predict(X_test)
print(f"accuracy score for kernel='rbf' and C=10 : {accuracy_score(y_test, y_pred):0.4f}")