-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict_results.py
90 lines (70 loc) · 2.71 KB
/
predict_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# -*- coding: utf-8 -*-
import pandas as pd
import csv
from sklearn.pipeline import make_pipeline
from joblib import load
def predict_using_trained_model(model, prediction_data_file, feature_columns, output_columns):
'''
Predict output for the given input data using the given
Parameters
----------
model : Regression model
a trained model
feature_columns : list
a list of features
prediction_data_file : csv file
a csv file with input data
Returns
-------
df_predict : dataframe
a dataframe with predicted results
y_predict : list
a list of predicted values
'''
### Predict
print("\nPredicting...")
df_predict = pd.read_csv(prediction_data_file)
y_predict = []
if df_predict.isnull().values.any():
print('The data for predictions have NaN values. Please check the data for predictions.')
else:
X_predict = pd.DataFrame(df_predict, columns = feature_columns)
y_predict = model.predict(X_predict)
print(y_predict)
for idx, value in enumerate(output_columns):
df_predict[value] = y_predict
return df_predict, y_predict
def main():
### Predict
prediction_data_file = "./prediction_dataset_87descriptors.csv"
result_file = "./predicted results.csv"
output_columns = ['Active/Inactive 1', 'Active/Inactive 2']
# Read feature columns
feature_columns = []
with open("feature columns.csv", newline='') as out:
reader = csv.reader(out)
# get the first line
feature_columns = list(reader)[0]
predictions = {}
for idx in range(0, len(output_columns)):
output_column = output_columns[idx]
### Load saved model
model_filename = "model for output " + str(idx) + ".joblib"
mlp_pipeline = load(model_filename)
df_predicted, y_predicted = predict_using_trained_model(mlp_pipeline, prediction_data_file, feature_columns, [output_column])
y_predicted = [int(i) for i in y_predicted]
predictions[output_column] = y_predicted
# save results in a file
print('Saving predicted results...')
df_predict = pd.read_csv(prediction_data_file)
if df_predict.isnull().values.any():
print('The data for predictions have NaN values. Please check the data for predictions.')
else:
counter = 0
for key in predictions:
df_predict.insert(2 + counter, key, predictions[key])
counter += 1
df_predict.to_csv(result_file, sep = ",", index = False)
print("Results saved to file: ", result_file)
if __name__ == '__main__':
main()