In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC



In [2]:
np.random.seed(5)

In [5]:
train=pd.read_csv("basketball_train.csv") #80
test=pd.read_csv("basketball_test.csv") #20

In [None]:
#c:결정경계선의 마진을 결정하는 퍼러미터
#gamma:데이터포인트의 영향력을 결정하는 퍼러미터
#c가 클수록 -> 결정경계선과 서포트벡터의 간격(마진)이 좁아짐
#gamma 클수록 -> 결정경계선과 데이터포인트의 거리가 더욱 더 가까워짐(심하게 구부러짐)

In [6]:
train.head() 
#독립변수 : 3점슛, 블로킹  -> 종속변수 : Pos

Unnamed: 0,Player,Pos,3P,TRB,BLK
0,Will Barton,SG,1.5,4.3,0.5
1,Joe Harris,SG,1.6,2.8,0.2
2,Marco Belinelli,SG,1.4,2.4,0.1
3,Tony Snell,SG,1.8,3.1,0.2
4,Manu Ginobili,SG,1.3,2.3,0.2


In [10]:
xTrain=train[['3P', 'BLK']] #독립변수들
yTrain=train['Pos'] #종속변수

In [11]:
#svm 알고리즘 -> 모델링(최적의 퍼러미터 검색)

def svcParam(x, y):
    svcParams=[
        {
            'kernel':['rbf'],
            'gamma':[0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
            'C':[0.01, 0.1, 1, 10, 100, 1000]
        }
    ]
    
    model=GridSearchCV(SVC(), svcParams, cv=10)
    model.fit(xTrain, yTrain)
    return model #최적의 퍼러미터로 설정된 모델이 리턴


In [12]:
model=svcParam(xTrain, yTrain)
print(model)

GridSearchCV(cv=10, estimator=SVC(),
             param_grid=[{'C': [0.01, 0.1, 1, 10, 100, 1000],
                          'gamma': [1e-05, 0.0001, 0.001, 0.01, 0.1, 1],
                          'kernel': ['rbf']}])


In [15]:
print(model.best_params_) #최적의 퍼러미터
print(model.best_score_) #96%

# 80건 트레이닝 데이터를 10등분
# 1)9개:모델, 1개:테스트 : 95%
# 2)9개:모델, 1개:테스트 : 90%
#             ...
# 10)9개:모델, 1개:테스트 : 100%
# 1)~10) 정확도의 평균 : 96%
            

{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}
0.9625
11


In [17]:
xTest=test[['3P', 'BLK']]

In [18]:
yTest=test[['Pos']]

In [19]:
yPred=model.predict(xTest)

In [24]:
from sklearn.metrics import classification_report
#precision, recall, f-measure, accuracy

In [25]:
print("정확도:"+str(accuracy_score(yTest, yPred)))

정확도:0.95


In [27]:
print(classification_report(yTest, yPred))

              precision    recall  f1-score   support

           C       1.00      0.91      0.95        11
          SG       0.90      1.00      0.95         9

    accuracy                           0.95        20
   macro avg       0.95      0.95      0.95        20
weighted avg       0.96      0.95      0.95        20



In [29]:
yTest

Unnamed: 0,Pos
0,C
1,SG
2,SG
3,SG
4,SG
5,C
6,C
7,SG
8,SG
9,C


In [30]:
pd.DataFrame({"예측":yPred, "실제":yTest['Pos']})

Unnamed: 0,예측,실제
0,C,C
1,SG,SG
2,SG,SG
3,SG,SG
4,SG,SG
5,C,C
6,C,C
7,SG,SG
8,SG,SG
9,C,C
