In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm

# accuration predict
from sklearn.metrics import  confusion_matrix
from sklearn.metrics import fbeta_score
from sklearn.metrics import classification_report

In [3]:
# import data and convert to dataframe
df = pd.read_csv('Datasets/stara_class.csv')

df.head()

Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star type,Star color,Spectral Class
0,3068,0.0024,0.17,16.12,0,Red,M
1,3042,0.0005,0.1542,16.6,0,Red,M
2,2600,0.0003,0.102,18.7,0,Red,M
3,2800,0.0002,0.16,16.65,0,Red,M
4,1939,0.000138,0.103,20.06,0,Red,M


In [4]:
X = df.iloc[:, :3]
X

Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro)
0,3068,0.002400,0.1700
1,3042,0.000500,0.1542
2,2600,0.000300,0.1020
3,2800,0.000200,0.1600
4,1939,0.000138,0.1030
...,...,...,...
235,38940,374830.000000,1356.0000
236,30839,834042.000000,1194.0000
237,8829,537493.000000,1423.0000
238,9235,404940.000000,1112.0000


In [5]:
y = df.iloc[:, -1]
y

0      M
1      M
2      M
3      M
4      M
      ..
235    O
236    O
237    A
238    A
239    O
Name: Spectral Class, Length: 240, dtype: object

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## SVM

---
SVM merupakan suatu teknik untuk menemukan hyperplane yang bisa memisahkan dua set data dari dua kelas yang berbeda (Vapnik, 1999). SVM memiliki kelebihan diantaranya adalah dalam menentukan jarak menggunakan support vector sehingga proses komputasi menjadi cepat (Vapnik, 1995). <br> <br>
Hyperplane adalah sebuah fungsi yang dapat digunakan untuk pemisah antar kelas. Dalam SVM objek data terluar yang paling dekat dengan hyperplane disebut support vector. Objek yang disebut support vector paling sulit diklasifikasikan dikarenakan posisi yang hampir tumpang tindih (overlap) dengan kelas lain

#### Implementasi Code :

In [7]:
model_svm = svm.SVC(kernel='linear')
model_svm.fit(X_train, y_train)

model_svm

SVC(kernel='linear')

In [8]:
predict_svm = model_svm.predict(X_test)
predict_svm

array(['B', 'A', 'M', 'O', 'A', 'F', 'B', 'K', 'B', 'B', 'M', 'K', 'M',
       'B', 'M', 'O', 'M', 'B', 'F', 'B', 'M', 'B', 'O', 'O', 'O', 'M',
       'M', 'M', 'O', 'B', 'M', 'F', 'M', 'M', 'M', 'B', 'A', 'B', 'B',
       'M', 'F', 'F', 'O', 'M', 'M', 'M', 'O', 'O'], dtype=object)

In [9]:
print(classification_report(y_test, predict_svm))

              precision    recall  f1-score   support

           A       1.00      0.43      0.60         7
           B       0.67      0.89      0.76         9
           F       0.60      0.50      0.55         6
           K       0.50      1.00      0.67         1
           M       1.00      1.00      1.00        17
           O       0.89      1.00      0.94         8

    accuracy                           0.83        48
   macro avg       0.78      0.80      0.75        48
weighted avg       0.86      0.83      0.82        48



In [10]:
print(fbeta_score(y_test, predict_svm, beta=1.3, average='micro'))

0.8333333333333335


## SVR

---
Support Vector Regression (SVR) merupakan suatu metode SVM yang diterapkan pada kasus regresi. Menurut (Scholkopt dan Smola, 2012), SVR bertujuan untuk menemukan sebuah fungsi f(x) sebagai suatu hyperplane (garis pemisah) berupa fungsi regresi yang mana sesuai dengan semua input data dengan membuat error (ε) sekecil mungkin. Menurut Santoso (2007), misalkan dipunyai l set data training, ( Xi, Yi) , i = 1,2,…,l dimana Xi merupakan vektor input

#### Implementasi Code :



In [11]:
from sklearn.preprocessing import LabelEncoder

In [12]:
y_train

57     M
122    M
51     M
161    O
233    B
      ..
79     M
228    O
200    B
22     F
158    A
Name: Spectral Class, Length: 192, dtype: object

In [13]:
le = LabelEncoder()
y_train_encode = le.fit_transform(y_train)

In [14]:
model_svr = svm.SVR(kernel='rbf')
model_svr.fit(X_train, y_train_encode.astype('int'))

model_svr

SVR()

In [15]:
predict_svr = model_svr.predict(X_test)
predict_svr.round()

array([5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 4., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])

In [16]:
print(fbeta_score(y_test, predict_svm, beta=1.3, average='micro'))


0.8333333333333335
