# Import

In [20]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

# Load Dataset

In [7]:
data = pd.read_csv('Indeks Standar Pencemar Udara.csv')

data.head()

Unnamed: 0,tanggal,pm10,pm25,so2,co,1,no2,max,critical,categori,location
0,1/1/2021,43,,58,29,35,65,65,1,1,DKI2
1,1/2/2021,58,,86,38,64,80,86,2,1,DKI3
2,1/3/2021,64,,93,25,62,86,93,2,1,DKI3
3,1/4/2021,50,,67,24,31,77,77,1,1,DKI2
4,1/5/2021,59,,89,24,35,77,89,2,1,DKI3


# Selection

In [8]:
# artibut yang dihilangkan adalah tanggal, stasiun dan critical

data_selection = data.drop(columns=['tanggal','critical', 'location'])
data_selection.head()

Unnamed: 0,pm10,pm25,so2,co,1,no2,max,categori
0,43,,58,29,35,65,65,1
1,58,,86,38,64,80,86,1
2,64,,93,25,62,86,93,1
3,50,,67,24,31,77,77,1
4,59,,89,24,35,77,89,1


# Cleaning

In [9]:
# pembersihan data missing value

data_cleaning = data_selection.dropna()
data_cleaning.head()

Unnamed: 0,pm10,pm25,so2,co,1,no2,max,categori
31,73,126.0,38,26,46,34,126,0
32,53,70.0,40,14,55,25,70,1
33,32,53.0,40,11,42,19,53,1
34,36,59.0,40,14,47,24,59,1
35,29,51.0,40,14,45,35,51,1


In [10]:
data_cleaning.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 334 entries, 31 to 364
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   pm10      334 non-null    int64  
 1   pm25      334 non-null    float64
 2   so2       334 non-null    int64  
 3   co        334 non-null    int64  
 4   1         334 non-null    int64  
 5   no2       334 non-null    int64  
 6   max       334 non-null    int64  
 7   categori  334 non-null    int64  
dtypes: float64(1), int64(7)
memory usage: 23.5 KB


# Ganti Type Data

In [12]:
data_type = data_cleaning.astype({"pm25": 'int'})
data_type.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 334 entries, 31 to 364
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   pm10      334 non-null    int64
 1   pm25      334 non-null    int32
 2   so2       334 non-null    int64
 3   co        334 non-null    int64
 4   1         334 non-null    int64
 5   no2       334 non-null    int64
 6   max       334 non-null    int64
 7   categori  334 non-null    int64
dtypes: int32(1), int64(7)
memory usage: 22.2 KB


In [13]:
data_type.head()

Unnamed: 0,pm10,pm25,so2,co,1,no2,max,categori
31,73,126,38,26,46,34,126,0
32,53,70,40,14,55,25,70,1
33,32,53,40,11,42,19,53,1
34,36,59,40,14,47,24,59,1
35,29,51,40,14,45,35,51,1


In [14]:
data_type['categori'].value_counts()

1    195
0    136
2      3
Name: categori, dtype: int64

In [15]:
X = data_type.drop (columns='categori', axis=1)
Y = data_type['categori']
print(X)

     pm10  pm25  so2  co   1  no2  max
31     73   126   38  26  46   34  126
32     53    70   40  14  55   25   70
33     32    53   40  11  42   19   53
34     36    59   40  14  47   24   59
35     29    51   40  14  45   35   51
..    ...   ...  ...  ..  ..  ...  ...
360    75   121   61  23  40   47  121
361    59    89   53  16  34   33   89
362    61    98   54  15  37   29   98
363    60   102   53  17  38   44  102
364    64    90   52  44  37   53   90

[334 rows x 7 columns]


In [16]:
print(Y)

31     0
32     1
33     1
34     1
35     1
      ..
360    0
361    1
362    1
363    0
364    1
Name: categori, Length: 334, dtype: int64


# DATA TRAINING DAN DATA TESTING

In [17]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [18]:
print(X.shape, X_train.shape, X_test.shape)

(334, 7) (267, 7) (67, 7)


# MODEL TRAINING

In [21]:
classifier = svm.SVC(kernel='linear')


In [22]:
classifier.fit(X_train, Y_train)

SVC(kernel='linear')

# EVALUASI TINGKAT AKURASI

In [24]:
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [25]:
print('Akurasi data training =', training_data_accuracy)

Akurasi data training = 1.0


In [27]:
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [28]:
print('Akurasi data testing =', training_data_accuracy)


Akurasi data testing = 1.0


# MODEL

In [35]:
input_data = (73, 126, 38, 26, 46, 34, 126)

input_data_as_numpy_array = np.array(input_data)

input_data_reshape = input_data_as_numpy_array.reshape(1,-1)

prediction = classifier.predict(input_data_reshape)
print(prediction)

if (prediction[0] == 0):
    print('Tidak Baik')

elif (prediction[0] == 1):
    print('Sedang')

else :
    print('Baik')

[0]
Tidak Baik




SIMPAN MODEL

In [38]:
import pickle

In [39]:
filename = 'resto_model.sav'
pickle.dump(classifier, open(filename,'wb'))