-
Notifications
You must be signed in to change notification settings - Fork 55
/
load.py
126 lines (105 loc) · 3.82 KB
/
load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Copyright (c) 2016. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Load predictors
'''
from __future__ import (
print_function,
division,
absolute_import,
)
import pickle
from os.path import join
import pandas
from ..downloads import get_path
from ..common import normalize_allele_name
CACHED_LOADER = None
def from_allele_name(allele_name):
"""
Load a predictor for an allele using the default loader.
Parameters
----------
allele_name : class I allele name
Returns
----------
Class1BindingPredictor
"""
return get_loader_for_downloaded_models().from_allele_name(allele_name)
def supported_alleles():
"""
Return a list of the names of the alleles for which there are trained
predictors in the default laoder.
"""
return get_loader_for_downloaded_models().supported_alleles
def get_loader_for_downloaded_models():
"""
Return a Class1AlleleSpecificPredictorLoader that uses downloaded models.
"""
global CACHED_LOADER
# Some of the unit tests manipulate the downloads directory configuration
# so get_path here may return different results in the same Python process.
# For this reason we check the path and invalidate the loader if it's
# different.
path = get_path("models_class1_allele_specific_single")
if CACHED_LOADER is None or path != CACHED_LOADER.path:
CACHED_LOADER = Class1AlleleSpecificPredictorLoader(path)
return CACHED_LOADER
class Class1AlleleSpecificPredictorLoader(object):
"""
Factory for Class1BindingPredictor instances that are stored on disk
using this directory structure:
production.csv - Manifest file giving information on all models
models/ - directory of models with names given in the manifest file
MODEL-BAR.pickle
MODEL-FOO.pickle
...
"""
def __init__(self, path):
"""
Parameters
----------
path : string
Path to directory containing manifest and models
"""
self.path = path
self.path_to_models_csv = join(path, "production.csv")
self.df = pandas.read_csv(self.path_to_models_csv)
self.df.index = self.df["allele"]
self.supported_alleles = list(sorted(self.df.allele))
self.predictors_cache = {}
def from_allele_name(self, allele_name):
"""
Load a predictor for an allele.
Parameters
----------
allele_name : class I allele name
Returns
----------
Class1BindingPredictor
"""
allele_name = normalize_allele_name(allele_name)
if allele_name not in self.predictors_cache:
try:
predictor_name = self.df.ix[allele_name].predictor_name
except KeyError:
raise ValueError(
"No models for allele '%s'. Alleles with models: %s"
" in models file: %s" % (
allele_name,
' '.join(self.supported_alleles),
self.path_to_models_csv))
model_path = join(self.path, "models", predictor_name + ".pickle")
with open(model_path, 'rb') as fd:
self.predictors_cache[allele_name] = pickle.load(fd)
return self.predictors_cache[allele_name]