Skip to content

Commit

Permalink
merge from master
Browse files Browse the repository at this point in the history
  • Loading branch information
maxibor committed Oct 29, 2019
2 parents dd4b3d2 + 468e331 commit 7c50be8
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 13 deletions.
6 changes: 5 additions & 1 deletion .coverage
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
!coverage.py: This is a private format, don't read it directly!{"lines":{"/Users/borry/Documents/GitHub/sourcepredict/sourcepredictlib/utils.py":[3,4,5,6,260,136,9,137,138,261,264,142,145,18,19,20,21,22,23,26,157,158,159,163,166,38,39,40,41,42,43,46,177,178,179,183,186,66,206,84,93,94,95,96,97,98,99,228,222,102,225,223,240,113,114,115,116,117,246,241,243,121,124],"/Users/borry/Documents/GitHub/sourcepredict/sourcepredictlib/ml.py":[5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,28,29,30,32,33,34,36,51,52,53,54,55,56,57,58,60,63,77,79,81,82,83,84,85,87,88,89,90,91,93,94,95,96,97,99,109,111,113,114,115,116,117,121,122,123,124,125,126,127,131,133,136,137,138,140,151,152,153,154,155,157,158,160,161,162,163,165,171,185,186,187,188,190,191,193,194,195,196,197,198,199,200,202,203,204,205,206,207,208,211,212,229,230,231,232,233,235,237,238,239,240,243,244,245,246,247,249,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,281,283,295,297,301,302,303,304,305,306,307,308,309,310,315,316,317,318,319,320,322,331,332,333,334,336,337,338,340,341,343,344,345,346,347,348,349,350,351,353,368,369,370,371,373,375,394,395,396,397,399,400,401,402,404,405,406,408,409,410,411,412,413],"/Users/borry/Documents/GitHub/sourcepredict/sourcepredictlib/normalize.py":[3,4,5,6,9,21,22,23,24,25,26,27,28,29,30,33,43,65,66,67,68,69,70,73,75,76,78,79,80,81,84,94,95,96,97,98,101,115,117,118,119,121]}}
<<<<<<< HEAD
!coverage.py: This is a private format, don't read it directly!{"lines":{"/Users/borry/Documents/GitHub/sourcepredict/sourcepredictlib/utils.py":[3,4,5,6,260,136,9,137,138,261,264,142,145,18,19,20,21,22,23,26,157,158,159,163,166,38,39,40,41,42,43,46,177,178,179,183,186,66,206,84,93,94,95,96,97,98,99,228,222,102,225,223,240,113,114,115,116,117,246,241,243,121,124],"/Users/borry/Documents/GitHub/sourcepredict/sourcepredictlib/ml.py":[5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,28,29,30,32,33,34,36,51,52,53,54,55,56,57,58,60,63,77,79,81,82,83,84,85,87,88,89,90,91,93,94,95,96,97,99,109,111,113,114,115,116,117,121,122,123,124,125,126,127,131,133,136,137,138,140,151,152,153,154,155,157,158,160,161,162,163,165,171,185,186,187,188,190,191,193,194,195,196,197,198,199,200,202,203,204,205,206,207,208,211,212,229,230,231,232,233,235,237,238,239,240,243,244,245,246,247,249,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,281,283,295,297,301,302,303,304,305,306,307,308,309,310,315,316,317,318,319,320,322,331,332,333,334,336,337,338,340,341,343,344,345,346,347,348,349,350,351,353,368,369,370,371,373,375,394,395,396,397,399,400,401,402,404,405,406,408,409,410,411,412,413],"/Users/borry/Documents/GitHub/sourcepredict/sourcepredictlib/normalize.py":[3,4,5,6,9,21,22,23,24,25,26,27,28,29,30,33,43,65,66,67,68,69,70,73,75,76,78,79,80,81,84,94,95,96,97,98,101,115,117,118,119,121]}}
=======
!coverage.py: This is a private format, don't read it directly!{"lines":{"/projects1/users/borry/18_sourcepredict/sourcepredictlib/ml.py":[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,25,26,27,29,31,34,36,60,63,99,133,140,171,210,211,247,281,334,228,229,230,231,232,234,236,237,238,239,242,243,244,245,51,52,53,54,55,56,57,58,77,79,81,82,83,84,85,87,88,89,90,91,93,94,95,96,97,109,111,113,114,115,116,117,121,122,123,124,125,126,127,131,136,137,138,151,152,153,154,155,157,158,160,161,162,163,165,184,185,186,187,189,190,192,193,194,195,196,197,198,199,201,202,203,204,205,206,207,256,257,258,259,260,262,263,261,264,265,266,267,268,269,270,279,293,295,299,300,301,302,313,314,315,320,329,330,331,332,303,304,305,306,307,308,316,317,318,349,350,351,352,354,356,375,376,377,378,380,381,382,383,385,386,387,389,390,391,392,393,394],"/projects1/users/borry/18_sourcepredict/sourcepredictlib/normalize.py":[3,4,5,6,9,33,84,101,115,117,118,119,121,21,22,23,24,25,26,27,28,29,30,43,75,76,78,79,65,66,67,68,69,70,73,80,81,94,95,96,97,98],"/projects1/users/borry/18_sourcepredict/sourcepredictlib/utils.py":[3,4,5,6,9,26,46,66,84,106,127,148,168,190,208,75,76,77,78,79,80,81,38,39,40,41,42,43,18,19,20,21,22,23,95,96,98,99,103,97,118,119,120,124,139,140,141,145,159,160,161,165,184,187,185,202,203,205,222,223,226]}}
>>>>>>> 468e331da275343b1fd57fc870e4414ed88ec7b5
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ __ homepage_
add_new_sources
run_example
mixed_prop
CDI_analysis


Indices and tables
Expand Down
16 changes: 11 additions & 5 deletions sourcepredict
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ Homepage & Documentation: github.com/maxibor/sourcepredict
default='weighted_unifrac',
help="Distance method. (unweighted_unifrac | weighted_unifrac) Default = weighted_unifrac"
)
parser.add_argument(
'-r',
dest="tax_rank",
default='species',
help="Taxonomic rank to use for Unifrac distances. Default = species"
)
parser.add_argument(
'-me',
dest="method",
Expand Down Expand Up @@ -120,6 +126,7 @@ Homepage & Documentation: github.com/maxibor/sourcepredict
labels = args.labels
seed = int(args.seed)
distance = args.distance
rank = args.tax_rank
method = args.method
ml = args.learning
neighbors = int(args.neighbors)
Expand All @@ -130,13 +137,13 @@ Homepage & Documentation: github.com/maxibor/sourcepredict
kfold = int(args.kfold)
threads = int(args.threads)

return(sink, alpha, normalization, sources, labels, seed, distance, method, ml, neighbors, weights, dim, output, embed, kfold, threads)
return(sink, alpha, normalization, sources, labels, seed, distance, rank, ml, method, neighbors, weights, dim, output, embed, kfold, threads)


if __name__ == "__main__":
version = "0.34"
warnings.filterwarnings("ignore")
SINK, ALPHA, NORMALIZATION, SOURCES, LABELS, SEED, DISTANCE, METHOD, ML, NEIGHBORS, WEIGTHS, DIM, OUTPUT, EMBED_CSV, KFOLD, THREADS = _get_args()
SINK, ALPHA, NORMALIZATION, SOURCES, LABELS, SEED, DISTANCE, RANK, ML, METHOD, NEIGHBORS, WEIGTHS, DIM, OUTPUT, EMBED_CSV, KFOLD, THREADS = _get_args()
SEED = utils.check_gen_seed(SEED)
np.random.seed(SEED)
embed_method = utils.check_embed(METHOD)
Expand All @@ -146,7 +153,6 @@ if __name__ == "__main__":
predictions = {}
distance_method = utils.check_distance(DISTANCE)
weigth = utils.check_weigths(WEIGTHS)
tax_rank = "species"
samp_pred = {}
print("Step 1: Checking for unknown proportion")
if ALPHA == 0:
Expand All @@ -173,8 +179,8 @@ if __name__ == "__main__":
print("Step 2: Checking for source proportion")
sm = sourcemap(source=SOURCES, sink=SINK, labels=LABELS,
norm_method=normalization, threads=THREADS)
print(f"\tComputing {distance_method} distance on {tax_rank} rank")
sm.compute_distance(distance_method=distance_method, rank=tax_rank)
print(f"\tComputing {distance_method} distance on {RANK} rank")
sm.compute_distance(distance_method=distance_method, rank=RANK)
print(f"\t{embed_method} embedding in {DIM} dimension{utils.plural(DIM)}")
sm.embed(n_comp=DIM, method=embed_method, seed=SEED, out_csv=EMBED_CSV)
if ml == 'knn':
Expand Down
8 changes: 4 additions & 4 deletions sourcepredictlib/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def normalize(self, method, threads):
['unknown'] * self.ref_u.shape[1], index=self.normalized_ref_u.columns, name='labels')
try:
self.sink = self.normalized.drop(
self.ref.columns, axis=1).T
self.ref.columns, axis=1, errors='ignore').T
except KeyError:
print(f"ERROR: Test sample present in training dataset")
sys.exit(1)
Expand Down Expand Up @@ -157,7 +157,7 @@ def embed(self, seed, n_comp=200, out_csv=None):
self.my_embed = my_embed
self.my_embed.set_index(self.bc.index, inplace=True)

self.ref_u = self.my_embed.drop(self.tmp_sink.columns, axis=0)
self.ref_u = self.my_embed.drop(self.tmp_sink.columns, axis=0, errors = 'ignore')
self.ref_u = self.ref_u.merge(
self.labels.to_frame(), left_index=True, right_index=True)
self.sink = self.my_embed.loc[self.tmp_sink.columns, :]
Expand Down Expand Up @@ -330,8 +330,8 @@ def embed(self, method, out_csv, seed, n_comp=200):

self.ref_t = self.my_embed.drop(self.test_samples, axis=0)
self.ref_t = self.ref_t.merge(
self.labels.to_frame(), left_index=True, right_index=True)
self.sink_t = self.my_embed.drop(self.train_samples, axis=0)
self.labels.to_frame(), left_index=True, right_index=True).dropna(axis=0)
self.sink_t = self.my_embed.drop(self.train_samples, axis=0).dropna(axis=0)

def gmm_classification(self, seed):
train_t_features, test_t_features, train_t_labels, test_t_labels = train_test_split(
Expand Down
6 changes: 3 additions & 3 deletions sourcepredictlib/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def RLE_normalize(pd_dataframe):
step5 = step4.apply(np.median, 0)
step6 = step5.apply(np.exp)
step7 = pd_dataframe.divide(step6, 1).apply(round, 1)
return(step7)
return(step7.dropna(axis=1))


def subsample_normalize_pd(pd_dataframe):
Expand Down Expand Up @@ -78,7 +78,7 @@ def subsample_normalize(serie, omax):
step2 = pd_dataframe.apply(
subsample_normalize, axis=0, args=(themax,))
step3 = step2.apply(np.floor, axis=1)
return(step3)
return(step3.dropna(axis=1))


def gmpr_size_factor(col, ar):
Expand Down Expand Up @@ -118,4 +118,4 @@ def GMPR_normalize(df, process):
with multiprocessing.Pool(process) as p:
sf = p.map(gmpr_sf_partial, list(range(np.shape(ar)[1])))

return(pd.DataFrame(np.divide(ar, sf), index=df.index, columns=df.columns))
return(pd.DataFrame(np.divide(ar, sf), index=df.index, columns=df.columns).dropna(axis=1))

0 comments on commit 7c50be8

Please sign in to comment.