-
Notifications
You must be signed in to change notification settings - Fork 0
/
groupXY.py
executable file
·125 lines (82 loc) · 3.51 KB
/
groupXY.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""
Main script for the FYP project imaging
"""
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage import morphology #for measuring things in the masks
from sklearn.model_selection import StratifiedShuffleSplit #for cross-validation
from sklearn.metrics import accuracy_score #for measuring performance
import groupXY_functions as util #custom-made functions with e.g. kNN classifier, you can also use sklearn
file_data = 'data/example_ground_truth.csv'
path_image = 'data/example_image'
path_mask = 'data/example_segmentation'
file_features = 'features/features.csv'
df = pd.read_csv(file_data)
# Extract image IDs and labels from the data
image_id = list(df['image_id'])
is_melanoma = np.array(df['melanoma'])
is_keratosis = np.array(df['seborrheic_keratosis'])
num_images = len(image_id)
#Make empty arrays to store features
features_area = np.empty([num_images,1])
features_area[:] = np.nan
features_perimeter = np.empty([num_images,1])
features_perimeter[:] = np.nan
#Loop through all images
for i in np.arange(num_images):
# Define filenames related to this image
file_image = path_image + os.sep + image_id[i] + '.jpg'
file_mask = path_mask + os.sep + image_id[i] + '_segmentation.png'
# Read the images with these filenames
im = plt.imread(file_image)
mask = plt.imread(file_mask)
# Measure features (custom made function)
a, p = util.measure_area_perimeter(mask)
# Store in the variables we created before
features_area[i,0] = a
features_perimeter[i,0] = p
###### TODO - Here you should measure and store some other features
# Store these features so you can reuse them later
feature_data = {"id": image_id,
"area": features_area.flatten(),
"perimeter": features_perimeter.flatten()
}
df_features = pd.DataFrame(feature_data)
df_features.to_csv(file_features, index=False)
# Load the data you saved, then do some analysis
df_features = pd.read_csv(file_features)
image_id = list(df_features['id'])
features_area = np.array(df_features['area'])
features_perimeter = np.array(df_features['perimeter'])
# Display the features measured in a scatterplot
axs = util.scatter_data(features_area, features_perimeter, is_melanoma)
axs.set_xlabel('X1 = Area')
axs.set_ylabel('X2 = Perimeter')
axs.legend()
# Load features and labels for the melanoma task
x = df_features.iloc[:,1:].to_numpy()
y = is_melanoma
#Prepare cross-validation
n_splits=5
kf = StratifiedShuffleSplit(n_splits=n_splits, test_size=0.4, random_state=1)
acc_val = np.empty([n_splits,1])
acc_test = np.empty([n_splits,1])
index_fold = 0
#Parameter for nearest neighbor classifier
k = 5
# Predict labels for each fold using the KNN algortihm
for train_index, test_val_index in kf.split(x, y):
# split dataset into a train, validation and test dataset
test_index, val_index = np.split(test_val_index, 2)
x_train, x_val, x_test = x[train_index], x[val_index], x[test_index]
y_train, y_val, y_test = y[train_index], y[val_index], y[test_index]
# Train and test custom-made kNN classifier. In sklearn you would do this with three steps, fit(x_train, y_train), predict(x_val), predict(x_test)
y_pred_val, y_pred_test = util.knn_classifier(x_train, y_train, x_val, x_test, k)
# Calculate accuracy
acc_val[index_fold] = accuracy_score(y_val,y_pred_val)
acc_test[index_fold] = accuracy_score(y_test,y_pred_test)
index_fold += 1
print(acc_val)
print(acc_test)