### 数据加载

In [67]:
from sklearn.datasets import make_blobs
data = make_blobs()

### 数据观察

In [68]:
import pandas as pd 
dataset = pd.DataFrame(data[0], columns=['feature1', 'feature2'])
dataset['label'] = data[1].T

In [69]:
dataset.describe()

Unnamed: 0,feature1,feature2,label
count,100.0,100.0,100.0
mean,0.140747,-2.707816,0.99
std,2.610902,5.253265,0.822598
min,-4.972564,-11.119013,0.0
25%,-2.671927,-9.032948,0.0
50%,1.088377,-1.380279,1.0
75%,2.089604,2.19161,2.0
max,4.258083,4.886955,2.0


In [70]:
dataset.head()

Unnamed: 0,feature1,feature2,label
0,1.656319,3.217968,2
1,2.031102,2.473734,2
2,-1.987267,-10.95555,0
3,1.922875,4.886955,2
4,2.734843,0.016298,1


In [71]:
dataset.shape

(100, 3)

In [72]:
X = data[0]
print('前几条数据：', '\n',X[:5])
print('数据条数：',len(X))

前几条数据： 
 [[  1.65631936   3.21796784]
 [  2.03110162   2.47373369]
 [ -1.98726663 -10.95554974]
 [  1.92287494   4.88695493]
 [  2.73484315   0.01629762]]
数据条数： 100


In [73]:
y_label = data[1]
print('前几条数据：', '\n',y_label[:5])
print('数据条数：',len(y_label))

前几条数据： 
 [2 2 0 2 1]
数据条数： 100


### 训练数据准备

In [74]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y_label, test_size=0.3)

### 模型训练与评估

#### OneVsRestClassifier + LinearSVC组合的多分类模型

In [85]:
### 模型训练和评估
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
multi_line_svm_model = OneVsRestClassifier(LinearSVC())
multi_line_svm_model.fit(train_X, train_y)



OneVsRestClassifier(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
          n_jobs=None)

In [92]:
print('训练集准确率：', multi_line_svm_model.score(train_X, train_y))
print('测试集集准确率：', multi_line_svm_model.score(test_X, test_y))

训练集准确率： 1.0
测试集集准确率： 1.0


#### OneVsRestClassifier + LR组合的多分类模型

In [99]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
multi_lr_model = OneVsRestClassifier(LogisticRegression(solver='lbfgs'))
multi_lr_model.fit(train_X, train_y)

OneVsRestClassifier(estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False),
          n_jobs=None)

In [100]:
print('训练集准确率：', multi_lr_model.score(train_X, train_y))
print('测试集集准确率：', multi_lr_model.score(test_X, test_y))

训练集准确率： 1.0
测试集集准确率： 1.0


#### SVM模型

In [94]:
from sklearn.svm import SVC
svm_model = SVC(gamma='auto')
svm_model.fit(train_X, train_y)
print('训练集准确率：', svm_model.score(train_X, train_y))
print('测试集集准确率：', svm_model.score(test_X, test_y))

训练集准确率： 1.0
测试集集准确率： 1.0


#### KNN模型

In [97]:
from sklearn.neighbors import KNeighborsClassifier
for n in range(1,5):
    knn_model = KNeighborsClassifier(n_neighbors=3)
    knn_model.fit(train_X, train_y)
    print('n_neighbors:', n)
    print('训练集准确率：', knn_model.score(train_X, train_y))
    print('测试集集准确率：', knn_model.score(test_X, test_y))
    print('')

n_neighbors: 1
训练集准确率： 1.0
测试集集准确率： 1.0

n_neighbors: 2
训练集准确率： 1.0
测试集集准确率： 1.0

n_neighbors: 3
训练集准确率： 1.0
测试集集准确率： 1.0

n_neighbors: 4
训练集准确率： 1.0
测试集集准确率： 1.0

