In [None]:
# 导入模块
import IPython
import sklearn as sk
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

# 查看版本号
print ('IPython version:', IPython.__version__)
print ('numpy version:', np.__version__)
print ('scikit-learn version:', sk.__version__)
print ('matplotlib version:', matplotlib.__version__)

### （1）获取X 和 y
 - 定义了get_X_and_y函数，可快速获得X和y

In [None]:
import pandas 

train_df = pandas.read_csv('train.csv')

In [None]:
def rle2mask(rle, input_shape=(256,1600)):
    height, width = input_shape
    #shape = input_shape[:2] #(256,1600)
    
    mask= np.zeros( height*width ).astype(np.uint8) 
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]
    
    starts -= 1 #因为起始值是1，所以先要把坐标减一下
    ends = starts + lengths
    
    for index, start in enumerate(starts):
        mask[int(start):int(start+lengths[index])] = 1 
    return mask

In [None]:
import pandas 
train_df = pandas.read_csv('train.csv')

df_class_1 = train_df[train_df["ClassId"]==1]
df_class_2 = train_df[train_df["ClassId"]==2]
df_class_3 = train_df[train_df["ClassId"]==3]
df_class_4 = train_df[train_df["ClassId"]==4]

def get_mask_from_single_Class (ClassId, num):  
    df = eval("df_class_" + str(ClassId))
    
    import random
    random.seed(33)   #修改随机种子会取得不同的结果
    try:
        Index_List = random.sample(range(0,df.shape[0]), num)
    except:
        return("num超过训练集提供的{}类裂纹数量".format(ClassId))
    
    mask = np.zeros((num, 409600))
    for i,index in enumerate(Index_List):
        mask[i] = rle2mask(df.iloc[index]["EncodedPixels"])
    
    return mask # 返回ClassId中随机抽取得到的num个裂缝，组成的(num,409600)数组

def get_X_and_y (NUM = []):  #希望每个Class中被抽取多少个放入X呢？输入列表类型，比如(200,230,250,300),表示一次取I类200个，Ⅱ类230个...
    X = get_mask_from_single_Class (1, NUM[0])
    y = np.zeros(NUM[0]).astype(int) + 1
    for i in range(2,5):
        X = np.concatenate((X, get_mask_from_single_Class (i, NUM[i-1])))
        y = np.concatenate((y, np.zeros(NUM[i-1]).astype(int) + i))
    return X, y

In [None]:
print(df_class_1.shape)
print(df_class_2.shape)
print(df_class_3.shape)
print(df_class_4.shape)

In [None]:
NUM = [250,240,260,250]
X,y = get_X_and_y(NUM)

### （2）获取训练集和测试集

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=0)

### （3）采用SVM进行训练

In [None]:
from sklearn.svm import SVC
svc_1 = SVC(kernel='linear')

In [None]:
svc_1.fit(X, y)

### （4）训练结果展示

 - $K折分析$

In [None]:
from sklearn.model_selection import cross_val_score, KFold
from scipy.stats import sem # sem 标准误差平均

def evaluate_cross_validation(clf, X, y, K):
    # 创建 K-折交叉验证迭代器对象
    cv = KFold(K, shuffle=True, random_state=0)
    #cv = KFold(len(y), K, shuffle=True, random_state=0)
    # 计算返回分数
    scores = cross_val_score(clf, X, y, cv=cv)
    print (scores)
    print (("\n平均值: {0:.3f} (均标准差:+/-{1:.3f})").
           format(
               np.mean(scores), # 均值
               sem(scores) )    # 均标准差
           )

In [None]:
evaluate_cross_validation(svc_1, X_train, y_train, 5)

 - $分类识别效果$

In [None]:
from sklearn import metrics

def train_and_evaluate(clf, X_train, X_test, y_train, y_test):
    clf.fit(X_train, y_train)             # 训练
    
    print ("训练集精度:")   # 训练集精度
    print (clf.score(X_train, y_train))

    print ("\n测试集精度:")    # 测试集精度
    print (clf.score(X_test, y_test))
    
    y_pred = clf.predict(X_test)
    
    print ("\n分类识别报告:")      # 分类识别报告
    print (metrics.classification_report(y_test, y_pred))

    print ("\n混淆矩阵:")           # 混淆矩阵
    print (metrics.confusion_matrix(y_test, y_pred))

In [None]:
# 用测试集测试
y_pred = svc_1.predict(X_test)

In [None]:
# 输出分类识别报告
print ("分类识别报告:")
report_str = metrics.classification_report(y_test, y_pred)
print (report_str)

In [None]:
# 计算混淆矩阵并输出
conf_mat = metrics.confusion_matrix(y_test, y_pred)

print ("混淆矩阵维度")
print (conf_mat.shape)

print ("\n\n")

print ("混淆矩阵:")
print (conf_mat[:,:])

In [None]:
#输出总的训练与评价结果
train_and_evaluate(svc_1, X_train, X_test, y_train, y_test)