# import

In [21]:
import pickle
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage.feature import local_binary_pattern
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn import preprocessing

from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import KFold, cross_val_score
from xgboost import XGBClassifier
from sklearn import svm

import warnings
warnings.filterwarnings('ignore')

In [22]:
plantimg_shape = (200, 200)

In [23]:
train_plantimage = pickle.load(open('E:/py/MachineLearing/MachineLearning-CourseExercise/my_train.pkl', 'rb'))
test_plantimage = pickle.load(open('E:/py/MachineLearing/MachineLearning-CourseExercise/my_test.pkl', 'rb'))
print('训练集长度:', len(train_plantimage['data']), '测试集长度:', len(test_plantimage['data']))

训练集长度: 4750 测试集长度: 794


数据处理

In [24]:
for i in range(len(train_plantimage['data'])):
    img=train_plantimage['data'][i]
    target=train_plantimage['target'][i]
    
    # 翻转
    train_plantimage['data'].append(cv2.flip(img, -1))
    train_plantimage['target'].append(target)
    train_plantimage['data'].append(cv2.flip(img, 1))
    train_plantimage['target'].append(target)

    # 旋转
    # getRotationMatrix2D(旋转中心,旋转角度,缩放比例)
    RotationMatrix = cv2.getRotationMatrix2D((int(plantimg_shape[0]*0.5),int(plantimg_shape[1]*0.5)), 45, 1)
    warpAffine = cv2.warpAffine(img, RotationMatrix, plantimg_shape)
    train_plantimage['data'].append(warpAffine)
    train_plantimage['target'].append(target)
    RotationMatrix = cv2.getRotationMatrix2D((int(plantimg_shape[0]*0.5),int(plantimg_shape[1]*0.5)), 90, 1)
    warpAffine = cv2.warpAffine(img, RotationMatrix, plantimg_shape)
    train_plantimage['data'].append(warpAffine)
    train_plantimage['target'].append(target)
    RotationMatrix = cv2.getRotationMatrix2D((int(plantimg_shape[0]*0.5),int(plantimg_shape[1]*0.5)), 135, 1)
    warpAffine = cv2.warpAffine(img, RotationMatrix, plantimg_shape)
    train_plantimage['data'].append(warpAffine)
    train_plantimage['target'].append(target)

#
print('数据拓展')

数据拓展


In [25]:
def plantimg_mask(image):
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # 突出绿色部分
    sensitivity = 35
    lower_hsv = np.array([60 - sensitivity, 100, 50])
    upper_hsv = np.array([60 + sensitivity, 255, 255])
    # 输出的图片为二值化图只有黑白两种颜色
    mask = cv2.inRange(image_hsv, lower_hsv, upper_hsv)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11))
    # 形态学滤波
    # cv2.morphologyEx(img, op, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

    return mask


def plantimg_GreenPlant(image):
    mask = plantimg_mask(image)
    output = cv2.bitwise_and(image, image, mask=mask)
    return output

In [26]:
def plantimg_GuassProcess(image):
    # cv2.GaussianBlur(SRC,ksize,sigmaX [,DST [,sigmaY [,borderType ] ] ] ) 
    # 减少噪声
    image_blurred = cv2.GaussianBlur(image, (0, 0), 3)
    image_Guass = cv2.addWeighted(image, 1.5, image_blurred, -0.5, 0)
    return image_Guass

In [27]:
# cv2.MORPH_OPEN	开运算(open) ,先腐蚀后膨胀的过程。开运算可以用来消除小黑点，在纤细点处分离物体、平滑较大物体的边界的 同时并不明显改变其面积。
# cv2.MORPH_CLOSE	闭运算(close)，先膨胀后腐蚀的过程。闭运算可以用来排除小黑洞。
# cv2.MORPH_GRADIENT	形态学梯度(morph-grad)，可以突出团块(blob)的边缘，保留物体的边缘轮廓。
# cv2.MORPH_TOPHAT	顶帽(top-hat)，将突出比原轮廓亮的部分。
# cv2.MORPH_BLACKHAT	黑帽(black-hat)，将突出比原轮廓暗的部分。

特征提取

In [28]:
winSize = plantimg_shape
blockSize = (int(plantimg_shape[0]*0.2),int(plantimg_shape[1]*0.2))
blockStride = (int(plantimg_shape[0]*0.2),int(plantimg_shape[1]*0.2))
cellSize = (int(plantimg_shape[0]*0.1),int(plantimg_shape[1]*0.1))
nbins = 4

# cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,histogramNormType,L2HysThreshold,gammaCorrection,nlevels)
hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins)

# cv2.ORB_create(nfeatures = 500,scaleFactor = 1.2,nlevels = 8,edgeThreshold = 31,firstLevel = 0,WTA_K = 2,scoreType = HARRIS_SCORE,patchSize = 31,fastThreshold = 20)
orb=cv2.ORB_create(nfeatures=50)

In [29]:
# nfeatures ：最多提取的特征点的数量；
# scaleFactor ： 金字塔图像之间的尺度参数，类似于SIFT中的k；
# nlevels： 高斯金字塔的层数；
# edgeThreshold ：边缘阈值，这个值主要是根据后面的patchSize来定的，靠近边缘edgeThreshold以内的像素是不检测特征点的。
# firstLevel-：看过SIFT都知道，我们可以指定第一层的索引值，这里默认为0。
# WET_K ： 用于产生BIREF描述子的点对的个数，一般为2个，也可以设置为3个或4个，那么这时候描述子之间的距离计算就不能用汉明距离了，而是应该用一个变种。OpenCV中，如果设置WET_K = 2，则选用点对就只有2个点，匹配的时候距离参数选择NORM_HAMMING，如果WET_K设置为3或4，则BIREF描述子会选择3个或4个点，那么后面匹配的时候应该选择的距离参数为NORM_HAMMING2。
# scoreType ：用于对特征点进行排序的算法，你可以选择HARRIS_SCORE，也可以选择FAST_SCORE，但是它也只是比前者快一点点而已。
# patchSize ：用于计算BIREF描述子的特征点邻域大小。

In [35]:
# 特征提取
winStride = (8, 8)
padding = (8, 8)

plantimg_train_HOG=[]
plantimg_train_ORB=[]
plantimg_train_LBP=[]
plantimg_train_Origin_GRAY=[]

print("训练集特征提取")

for img_data in tqdm(train_plantimage['data']):
    image_GreenPlant = plantimg_GreenPlant(img_data)
    image_Guass = plantimg_GuassProcess(image_GreenPlant)
    gray = cv2.cvtColor(image_Guass, cv2.COLOR_BGR2GRAY)
    
    # 生成gray
    gray_resized=cv2.resize(gray, (20, 20))
    plantimg_train_Origin_GRAY.append(gray_resized.reshape((-1,)))
    
    # LBP
    lbp = local_binary_pattern(gray,P=8,R=3)
    max_bins=lbp.max()
    lbp_hist,_=np.histogram(lbp.reshape((-1,)), normed=True, density=True, bins=256, range=(0, max_bins))
    plantimg_train_LBP.append(lbp_hist)
    
    # ORB
    ORB_zero=np.zeros((50,32))
    kpsA, descsA = orb.detectAndCompute(gray, None)
    try:
        ORB=np.pad(descsA,((0,50-descsA.shape[0]),(0,0)),'constant')
    except:
        ORB=np.zeros((50,32))
    assert ORB.shape==(50,32)
    plantimg_train_ORB.append(ORB.reshape((-1,)))
    
    # HOG
    
    #hog_result = hog.compute(image_Guass, winStride, padding).reshape((-1,))
    hog_result = hog.compute(gray, winStride, padding).reshape((-1,))
    plantimg_train_HOG.append(hog_result)

print('训练集HOG维度:{}'.format(plantimg_train_HOG[0].shape))
print('训练集LBP维度:{}'.format(plantimg_train_LBP[0].shape))
print('训练集ORB维度:{}'.format(plantimg_train_ORB[0].shape))
print('训练集GRAY维度:{}'.format(plantimg_train_Origin_GRAY[0].shape))



训练集特征提取


100%|██████████| 28500/28500 [05:04<00:00, 93.63it/s]

训练集HOG维度:(3600,)
训练集LBP维度:(256,)
训练集ORB维度:(1600,)
训练集GRAY维度:(400,)





In [34]:
plantimg_test_HOG=[]
plantimg_test_ORB=[]
plantimg_test_LBP=[]
plantimg_test_Origin_GRAY=[]

print("测试集特征提取")

for img_data in tqdm(test_plantimage['data']):
    image_GreenPlant = plantimg_GreenPlant(img_data)
    image_Guass = plantimg_GuassProcess(image_GreenPlant)
    gray = cv2.cvtColor(image_Guass, cv2.COLOR_BGR2GRAY)
    
    # 生成gray
    gray_resized=cv2.resize(gray, (20, 20))
    plantimg_test_Origin_GRAY.append(gray_resized.reshape((-1,)))
    
    # LBP
    lbp = local_binary_pattern(gray,P=8,R=3)
    max_bins = lbp.max()
    lbp_hist,_ = np.histogram(lbp.reshape((-1,)), normed=True, density=True, bins=256, range=(0, max_bins))
    plantimg_test_LBP.append(lbp_hist)
    
    # ORB

    kpsA, descsA = orb.detectAndCompute(gray, None)
    try:
        ORB=np.pad(descsA,((0,50-descsA.shape[0]),(0,0)),'constant')
    except:
        ORB=np.zeros((50,32))
    assert ORB.shape==(50,32)
    plantimg_test_ORB.append(ORB.reshape((-1,)))
    
    # HOG
    #hog_result = hog.compute(image_Guass, winStride, padding).reshape((-1,))
    hog_result = hog.compute(gray, winStride, padding).reshape((-1,))
    plantimg_test_HOG.append(hog_result)

print('测试集HOG维度:{}'.format(plantimg_test_HOG[0].shape))
print('测试集LBP维度:{}'.format(plantimg_test_LBP[0].shape))
print('测试集ORB维度:{}'.format(plantimg_test_ORB[0].shape))
print('测试集GRAY维度:{}'.format(plantimg_test_Origin_GRAY[0].shape))

测试集特征提取


100%|██████████| 794/794 [00:09<00:00, 80.52it/s]

测试集HOG维度:(3600,)
测试集LBP维度:(256,)
测试集ORB维度:(1600,)
测试集GRAY维度:(400,)





In [36]:
# 特征归一化
# 使用MinMaxScaler()
# sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1), copy=True)
# 最小-最大规范化对原始数据进行线性变换，变换到[0,1]区间（也可以是其他固定最小最大值的区间）每个特征中的最小值变成了0，最大值变成了1.
print('训练集特征归一化开始')
plantimg_train__MinMax_HOG = preprocessing.MinMaxScaler()
plantimg_train__MinMax_HOG_data = plantimg_train__MinMax_HOG.fit_transform(plantimg_train_HOG)
plantimg_train_HOG = plantimg_train__MinMax_HOG.inverse_transform(plantimg_train__MinMax_HOG_data)

plantimg_train__MinMax_LBP = preprocessing.MinMaxScaler()
plantimg_train__MinMax_LBP_data = plantimg_train__MinMax_LBP.fit_transform(plantimg_train_LBP)
plantimg_train_LBP = plantimg_train__MinMax_LBP.inverse_transform(plantimg_train__MinMax_LBP_data)

plantimg_train__MinMax_ORB = preprocessing.MinMaxScaler()
plantimg_train__MinMax_ORB_data = plantimg_train__MinMax_ORB.fit_transform(plantimg_train_ORB)
plantimg_train_ORB = plantimg_train__MinMax_ORB.inverse_transform(plantimg_train__MinMax_ORB_data)

plantimg_train__MinMax_Origin_GRAY = preprocessing.MinMaxScaler()
plantimg_train__MinMax_Origin_GRAY_data = plantimg_train__MinMax_Origin_GRAY.fit_transform(plantimg_train_Origin_GRAY)
plantimg_train_Origin_GRAY = plantimg_train__MinMax_Origin_GRAY.inverse_transform(plantimg_train__MinMax_Origin_GRAY_data)


print('训练集特征归一化结束')

训练集特征归一化开始
训练集特征归一化结束


In [37]:
print('测试集特征归一化开始')
plantimg_test__MinMax_HOG = preprocessing.MinMaxScaler()
plantimg_test__MinMax_HOG_data = plantimg_test__MinMax_HOG.fit_transform(plantimg_test_HOG)
plantimg_test_HOG = plantimg_test__MinMax_HOG.inverse_transform(plantimg_test__MinMax_HOG_data)

plantimg_test__MinMax_LBP = preprocessing.MinMaxScaler()
plantimg_test__MinMax_LBP_data = plantimg_test__MinMax_LBP.fit_transform(plantimg_test_LBP)
plantimg_test_LBP = plantimg_test__MinMax_LBP.inverse_transform(plantimg_test__MinMax_LBP_data)

plantimg_test__MinMax_ORB = preprocessing.MinMaxScaler()
plantimg_test__MinMax_ORB_data = plantimg_test__MinMax_ORB.fit_transform(plantimg_test_ORB)
plantimg_test_ORB = plantimg_test__MinMax_ORB.inverse_transform(plantimg_test__MinMax_ORB_data)

plantimg_test__MinMax_Origin_GRAY = preprocessing.MinMaxScaler()
plantimg_test__MinMax_Origin_GRAY_data = plantimg_test__MinMax_Origin_GRAY.fit_transform(plantimg_test_Origin_GRAY)
plantimg_test_Origin_GRAY = plantimg_test__MinMax_Origin_GRAY.inverse_transform(plantimg_test__MinMax_Origin_GRAY_data)

print('测试集特征归一化结束')

测试集特征归一化开始
测试集特征归一化结束


In [38]:
# 特征融合
print('特征融合开始')

plantimg_train_feature=np.hstack([np.array(plantimg_train_HOG),np.array(plantimg_train_LBP),np.array(plantimg_train_ORB),np.array(plantimg_train_Origin_GRAY)])
plantimg_test_feature=np.hstack([np.array(plantimg_test_HOG),np.array(plantimg_test_LBP),np.array(plantimg_test_ORB),np.array(plantimg_test_Origin_GRAY)])

plantimg_train_feature_3=np.hstack([np.array(plantimg_train_HOG),np.array(plantimg_train_LBP),np.array(plantimg_train_ORB)])
plantimg_test_feature_3=np.hstack([np.array(plantimg_test_HOG),np.array(plantimg_test_LBP),np.array(plantimg_test_ORB)])

plantimg_train_feature_HOG=np.array(plantimg_train_HOG)
plantimg_test_feature_HOG=np.array(plantimg_test_HOG)

print('train HOG特征维度', plantimg_train_feature_HOG.shape)
print('test HOG特征维度', plantimg_test_feature_HOG.shape)

print('train 融合3特征维度', plantimg_train_feature_3.shape)
print('test 融合3特征维度', plantimg_test_feature_3.shape)

print('train融合特征维度', plantimg_train_feature.shape)
print('test融合特征维度', plantimg_test_feature.shape)

print('特征提取结束')

特征融合开始
train HOG特征维度 (28500, 3600)
test HOG特征维度 (794, 3600)
train 融合3特征维度 (28500, 5456)
test 融合3特征维度 (794, 5456)
train融合特征维度 (28500, 5856)
test融合特征维度 (794, 5856)
特征提取结束


In [38]:
# #数据降维
# print('数据降维开始')
# n_components=2000
# train_len=len(plantimg_train_feature)
# data=np.vstack([plantimg_train_feature,plantimg_test_feature])

# pca_tsne = TSNE(n_components=3)
# LOWERData_linear = pca_tsne.fit_transform(data)

# # sklearn_kpca = KernelPCA(n_components=n_components, kernel="rbf", gamma=15)
# # LOWERData_nonlinear = sklearn_kpca.fit_transform(data)

# LOWERData = LOWERData_linear
# print(LOWERData.shape)
# print('数据降维结束')

数据降维开始
(29294, 3)


主成分分析（PCA）是使用线性映射将数据进行降维，但是通常情况下高维到低维是非线性的，往往达不到预期的结果。核主成分分析（KPCA）将原始数据通过选择适当的核函数（Kernel）映射到高维空间，再利用高维度空间进行线性降维，是一种用于非线性分类的降维工具。因此 KPCA的核心就是核函数。同时，KPCA采用了比较复杂的非线性映射，提高了非线性数据的处理效率。

In [39]:
# # 降维后的数据
# plantimg_train_feature_low=LOWERData[0:train_len]
# assert train_len==len(plantimg_train_feature_low)
# plantimg_test_feature_low=LOWERData[train_len:]

In [40]:
# #特征数据存储
# print('特征数据存储开始')
# plantimg_train_feature_dist=train_plantimage.copy()
# plantimg_train_feature_dist['data']=plantimg_train_feature
# pickle.dump(plantimg_train_feature_dist,open('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/plantimg_train_feature_HOG.pkl','wb'))
# plantimg_test_feature_dist=test_plantimage.copy()
# plantimg_test_feature_dist['data']=plantimg_test_feature
# pickle.dump(plantimg_test_feature_dist,open('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/plantimg_test_feature_HOG.pkl','wb'))
# print('特征数据存储结束')


In [43]:
# # SVM分类(降维)

# print('SVM分类开始')

# modelSVM = svm.SVC()
# modelSVM.fit(plantimg_train_feature_low, train_plantimage['target'])
# predictedSVM = modelSVM.predict(plantimg_test_feature_low)

# print('114514\n')

SVM分类开始
114514



In [41]:
# SVM分类

print('SVM分类开始')
# ‘linear’:线性核函数 ‘poly’：多项式核函数 ‘rbf’：径像核函数/高斯核 ‘sigmod’:sigmod核函数 ‘precomputed’:核矩阵

modelSVM = svm.SVC(kernel='poly', verbose = False,coef0 = 0.0 , C = 10.0 ,degree = 4)
modelSVM.fit(plantimg_train_feature, train_plantimage['target'])
predictedSVM = modelSVM.predict(plantimg_test_feature)

# modelSVM_rbf = svm.SVC(kernel='rbf',verbose = False,coef0 = 0.0)
# modelSVM_rbf.fit(plantimg_train_feature, train_plantimage['target'])
# predictedSVM_rbf = modelSVM_rbf.predict(plantimg_test_feature)


print('114514\n')

SVM分类开始
114514



In [45]:
# # 随机森林分类(降维)
# print('RF分类开始')

# modelRF = RandomForestClassifier()
# modelRF.fit(plantimg_train_feature_low, train_plantimage['target'])
# predictedRF = modelRF.predict(plantimg_test_feature_low)

# print('114514\n')

RF分类开始
114514



In [50]:
# 随机森林分类
print('RF分类开始')

modelRF = RandomForestClassifier()
modelRF.fit(plantimg_train_feature, train_plantimage['target'])
predictedRF = modelRF.predict(plantimg_test_feature)

print('114514\n')

RF分类开始
114514



In [40]:
# #XGBoost分类(降维)
# print('Xgboost分类开始')

# model = XGBClassifier(max_depth=5)
# model.fit(plantimg_train_feature_low, train_plantimage['target'])
# predictedXG = model.predict(plantimg_test_feature_low)

# print('114514\n')



Xgboost分类开始
114514



In [47]:
#XGBoost分类
print('Xgboost分类开始')

modelXG = XGBClassifier(max_depth=5)
modelXG.fit(plantimg_train_feature, train_plantimage['target'])
predictedXG = modelXG.predict(plantimg_test_feature)

print('114514\n')


Xgboost分类开始
114514



In [13]:
# def model_kf(my_model):
#     kf = KFold(5, shuffle=True, random_state=50).get_n_splits(plantimg_train_feature)
#     result_list= np.sqrt(-cross_val_score(my_model, plantimg_train_feature, train_plantimage['target'], scoring="f1", cv = kf))
#     return(result_list)
# model_kf(model)

# cross_val_score(estimator, X, y=None, *, groups=None, scoring=None, cv=None, n_jobs=None, verbose=0, fit_params=None, pre_dispatch="2*n_jobs", error_score=np.nan)
# estimator：估计器，也就是模型
# X, y：数据，标签值
# soring：调用的方法
# cv：交叉验证生成器或可迭代的次数
# n_jobs：同时工作的cpu个数（-1代表全部）
# verbose：日志冗长度，int：冗长度，0：不输出训练过程，1：偶尔输出，>1：对每个子模型都输出
# fit_params：传递给估计器的拟合方法的参数
# pre_dispatch：控制并行执行期间调度的作业数量。减少这个数量对于避免在CPU发送更多作业时CPU内存消耗的扩大是有用的。

# scoring参数 accuracy average_percision f1 f1_micro f1_macro f1_weighted f1_samples neg_log_loss precision

In [42]:

# pred = np.exp(predicted)
# print(predicted)

#结果生成
subSVM=pd.read_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/sample_submission.csv')
subSVM['file'] = test_plantimage['file_name']
subSVM['species'] = list(map(lambda x:train_plantimage['dict'][x], predictedSVM))
subSVM.to_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/submission_SVM.csv', index=False)
print("结果写入到csv文件")

结果写入到csv文件


In [52]:
# 结果生成
subRF=pd.read_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/sample_submission.csv')
subRF['file'] = test_plantimage['file_name']
subRF['species'] = list(map(lambda x:train_plantimage['dict'][x], predictedRF))
subRF.to_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/submission_RF.csv', index=False)
print("结果写入到csv文件")

结果写入到csv文件


In [49]:
# 结果生成
subXG=pd.read_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/sample_submission.csv')
subXG['file'] = test_plantimage['file_name']
subXG['species'] = list(map(lambda x:train_plantimage['dict'][x], predictedXG))
subXG.to_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/submission_XG.csv', index=False)
print("结果写入到csv文件")
#

结果写入到csv文件


In [51]:
# 保存模型
pickle.dump(modelSVM,open("E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/modelSVM.pth","wb"))
pickle.dump(modelRF,open("E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/modelRF.pth","wb"))
pickle.dump(modelXG,open("E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/modelXG.pth","wb")) 
modelXG.save_model('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/XGBoost.model')

In [None]:
# 读取模型

# loaded_modelXG = pickle.load(open("E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/modelXG.pth","rb"))
# loaded_modelRF = pickle.load(open("E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/modelRF.pth","rb"))
# loaded_modelSVM = pickle.load(open("E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/modelSVM.pth","rb"))

In [54]:
# 模型对比
print('SVM分类开始')

modelSVM_poly_3 = svm.SVC(kernel='poly', verbose = False,coef0 = 0.0 , C = 10.0 ,degree = 3)
modelSVM_poly_3.fit(plantimg_train_feature_3, train_plantimage['target'])
predictedSVM_poly_3 = modelSVM_poly_3.predict(plantimg_test_feature_3)

subSVM_poly_3=pd.read_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/sample_submission.csv')
subSVM_poly_3['file'] = test_plantimage['file_name']
subSVM_poly_3['species'] = list(map(lambda x:train_plantimage['dict'][x], predictedSVM_poly_3))
subSVM_poly_3.to_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/submission_SVM_poly_3.csv', index=False)

print("结果写入到csv文件")

modelSVM_poly_HOG = svm.SVC(kernel='poly', verbose = False,coef0 = 0.0 , C = 10.0 ,degree = 3)
modelSVM_poly_HOG.fit(plantimg_train_feature_HOG, train_plantimage['target'])
predictedSVM_poly_HOG = modelSVM_poly_HOG.predict(plantimg_test_feature_HOG)

subSVM_poly_HOG=pd.read_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/sample_submission.csv')
subSVM_poly_HOG['file'] = test_plantimage['file_name']
subSVM_poly_HOG['species'] = list(map(lambda x:train_plantimage['dict'][x], predictedSVM_poly_HOG))
subSVM_poly_HOG.to_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/submission_SVM_poly_HOG.csv', index=False)

print("结果写入到csv文件")

SVM分类开始
结果写入到csv文件
结果写入到csv文件


In [None]:
# 模型对比
print('XGBoost分类开始')

modelXG_3 = XGBClassifier(max_depth=5)
modelXG_3.fit(plantimg_train_feature_3, train_plantimage['target'])
predictedXG_3 = modelXG_3.predict(plantimg_test_feature_3)

subXG_3=pd.read_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/sample_submission.csv')
subXG_3['file'] = test_plantimage['file_name']
subXG_3['species'] = list(map(lambda x:train_plantimage['dict'][x], predictedXG_3))
subXG_3.to_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/submission_XG_3.csv', index=False)
print("结果写入到csv文件")

modelXG_HOG = XGBClassifier(max_depth=5)
modelXG_HOG.fit(plantimg_train_feature_HOG, train_plantimage['target'])
predictedXG_HOG = modelXG_HOG.predict(plantimg_test_feature_HOG)

subXG_HOG=pd.read_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/sample_submission.csv')
subXG_HOG['file'] = test_plantimage['file_name']
subXG_HOG['species'] = list(map(lambda x:train_plantimage['dict'][x], predictedXG_HOG))
subXG_HOG.to_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/submission_XG_HOG.csv', index=False)
print("结果写入到csv文件")


In [53]:
# 模型对比
print('RF分类开始')
modelRF_3 = RandomForestClassifier()
modelRF_3.fit(plantimg_train_feature_3, train_plantimage['target'])
predictedRF_3 = modelRF_3.predict(plantimg_test_feature_3)

subRF_3=pd.read_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/sample_submission.csv')
subRF_3['file'] = test_plantimage['file_name']
subRF_3['species'] = list(map(lambda x:train_plantimage['dict'][x], predictedRF_3))
subRF_3.to_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/submission_RF_3.csv', index=False)
print("结果写入到csv文件")

modelRF_HOG = RandomForestClassifier()
modelRF_HOG.fit(plantimg_train_feature_HOG, train_plantimage['target'])
predictedRF_HOG = modelRF_HOG.predict(plantimg_test_feature_HOG)

subRF_HOG=pd.read_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/sample_submission.csv')
subRF_HOG['file'] = test_plantimage['file_name']
subRF_HOG['species'] = list(map(lambda x:train_plantimage['dict'][x], predictedRF_HOG))
subRF_HOG.to_csv('E:/py/MachineLearing/MachineLearning-CourseExercise/PlantSeedlingsClassification/submission_RF_HOG.csv', index=False)
print("结果写入到csv文件")


RF分类开始
结果写入到csv文件
结果写入到csv文件
