In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('default')


In [9]:
otu_raw = pd.read_csv("../Salt_pond_16S_otu.tax.filtered.txt",sep="\t",index_col=0)

# Filter OTU table to > 99 reads  -- Note if AGG or Grep, Agg, don't used filtered.
otu_F100 = otu_raw[otu_raw.iloc[:,:-1].sum(1) > 99]
otu_F100.head()

## IMPORT Sample mapping, modified from TL
map_iTag = pd.read_csv("../Salt_pond_ALL_Meta_whh.txt", sep="\t")
map_iTag_only = map_iTag[["Sample","Restoration","Depth"]]
map_iTag_only.collumns = ["Sample","Treat","Depth"]


## Drop " (class)", note double esc.
## "Canditatus" genera -> "Cand."
otu_F100["Consensus lineage"] = otu_F100["Consensus lineage"].str.replace(" \(class\)","").str.replace("Candidatus","Cand.")

## Drop chloroplast & mitochondrial reads
otu_F100 = otu_F100[~otu_F100["Consensus lineage"].str.contains("chloroplast")]
otu_F100 = otu_F100[~otu_F100["Consensus lineage"].str.contains("mitochondria")]

## Drop reads with no phylum
otu_F100 = otu_F100[~otu_F100["Consensus lineage"].str.contains("BacteriaKI")]
lineage = otu_F100["Consensus lineage"].str.split(";",expand=True)
lineage = lineage.replace(['k__*','p__*','c__*','o__*','f__*','g__*'],['','','','','',''], regex=True)
lineage.columns = ["Kingdom", "Phylum", "Class", "Order", "Family", "Genus"]

## add split tax into OTU table
otu_F100_LinSp = pd.concat([otu_F100,lineage],1)
otu_F100_LinSp_nophy = otu_F100_LinSp[~otu_F100_LinSp["Phylum"].isnull()]

## fill missing tax using upper level of tax
otu_F100_LinSp_nophy.loc[otu_F100_LinSp_nophy["Class"].isnull(),"Class"] = otu_F100_LinSp_nophy.loc[otu_F100_LinSp_nophy["Class"].isnull(),"Phylum"] + "PH"
otu_F100_LinSp_nophy.loc[otu_F100_LinSp_nophy["Order"].isnull(),"Order"] = otu_F100_LinSp_nophy.loc[otu_F100_LinSp_nophy["Order"].isnull(),"Class"] + "CL"
otu_F100_LinSp_nophy.loc[otu_F100_LinSp_nophy["Family"].isnull(),"Family"] = otu_F100_LinSp_nophy.loc[otu_F100_LinSp_nophy["Family"].isnull(),"Order"] + "OR"
otu_F100_LinSp_nophy.loc[otu_F100_LinSp_nophy["Genus"].isnull(),"Genus"] = otu_F100_LinSp_nophy.loc[otu_F100_LinSp_nophy["Genus"].isnull(),"Family"] + "FA"
otu_F100_LinSp_nophy.iloc[:,-6:] = otu_F100_LinSp_nophy.iloc[:,-6:].replace(['PHCLORFA','PHCLOR','PHCL','CLORFA','CLOR','ORFA'],['PH','PH','PH','CL','CL','OR'],regex=True)

## save OTU table
otu_F100_LinSp_nophy.index.name = "#OTU_ID"
otu_F100_LinSp_nophy.iloc[:,:-7].to_csv("OTU_table.txt",sep="\t")

col = otu_F100_LinSp_nophy.columns[:-7]
otu_F100_LinSp_nophy.to_csv("OTU_tax_table.txt",sep="\t")

## save genus table
otu_F100_LinSp_nophy_genus = otu_F100_LinSp_nophy.groupby("Genus")[col].sum()
otu_F100_LinSp_nophy_genus.index.name = "#" + otu_F100_LinSp_nophy_genus.index.name
otu_F100_LinSp_nophy_genus.to_csv("Genus_table.txt",sep="\t")

## save family table
otu_F100_LinSp_nophy_Family = otu_F100_LinSp_nophy.groupby("Family")[col].sum()
otu_F100_LinSp_nophy_Family.index.name = "#" + otu_F100_LinSp_nophy_Family.index.name
otu_F100_LinSp_nophy_Family.to_csv("Family_table.txt",sep="\t")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [10]:
otu_F100_LinSp_nophy_genus

Unnamed: 0_level_0,R1_A_D1,R1_A_D2,R1_B_D1,R1_B_D2,R1_C_D1,R1_C_D2,R2A_A_D1,R2A_A_D2,R2A_B_D1,R2A_B_D2,...,R2_B_D1,R2_B_D2,R2_C_D1,R2_C_D2,SF2_A_D1,SF2_A_D2,SF2_B_D1,SF2_B_D2,SF2_C_D1,SF2_C_D2
#Genus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
125ds10FA,0,0,0,0,0,0,0,25,78,22,...,0,0,0,0,0,0,0,0,0,0
4C0d-2CL,0,1,3,0,1,9,1,0,1,47,...,14,73,60,55,0,15,10,1,6,84
5bav_B12CL,0,0,0,0,0,0,3,1,11,937,...,0,0,0,0,27,39,51,52,65,93
A17,0,0,0,0,1,0,83,1087,350,221,...,1,0,0,2,1,2,6,3,6,2
A4bOR,0,0,0,1,0,0,4479,4199,6801,1246,...,1,2,0,0,83,115,125,128,603,76
ABY1_OD1PH,119,104,29,30,13,15,0,4,2,1,...,66,17,36,21,62,17,0,1,42,0
AKIW874FA,1,1,2,0,1,5,37,47,11,1,...,4,10,1,1,0,3,18,10,118,51
AT425_EubG1CL,0,0,0,0,0,0,0,1,1,3,...,0,0,0,0,27,26,147,142,180,44
Acetivibrio,78,77,92,116,45,52,0,1,0,34,...,32,90,15,64,25,164,744,604,533,755
AcetobacteraceaeFA,0,0,0,0,0,0,0,29,8,361,...,0,1,0,0,0,2,0,1,3,3
