# Look and Euphausiid and Copepod Communities

- create table


In [1]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt

#For illustrator import:
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

In [2]:
#Functions

#Raw Read Numbers
def make_rawread(infile):
    #infile = OTU_table_taxa_all.txt
    df = pd.read_csv(infile, sep='\t')
    df.rename(columns={'DUP_ID':'OTU'}, inplace=True)
    df.drop('taxonomy', axis=1, inplace=True)
    df.drop('repseq', axis=1, inplace=True)
    df.set_index('OTU', inplace=True)
    return df
    
#metadata handling and sample order
def make_metadata(infile):
    df = pd.read_csv(infile)
    #make all column names with underscore
    columns_l = list(df)
    columns_l = [c.replace('-', '_') for c in columns_l]
    df.columns = columns_l
    #extract site name
    df['site'] = df['sample_name'].str.extract('([a-zA-Z0-9]+)')
    df_full = df[:]
    #add in some missing location data
    df_full.loc[df_full['site']=='UC5', 'dec_lat'] = 32
    df_full.loc[df_full['site']=='UC5', 'dec_long'] = '.118.17'
    df_full['dec_long'] = df_full['dec_long'].str.replace('^\.','-')
    df_full.set_index('sample_name', inplace=True)
    df= df[['Description', 'Treatment', 'sample_name']]
    df['comb'] = df['Description']+'_'+df['Treatment']+'_'+df['sample_name']
    df['site_Order']=df['sample_name'].str.replace('GOC2a', '16').str.replace('GOC2b', '17').str.replace('NTC', '0')
    df['site_Order']=df['site_Order'].str.extract('(\d+)')
    df.set_index('sample_name', inplace=True)
    df['site_Order']= df['site_Order'].astype(int)
    df.sort_values('site_Order', ascending=True, inplace=True)
    return df_full, df

#DESEQ Numbers
def make_deseq(infile):
    df = pd.read_csv(infile, sep=',')
    df.rename(columns={'Unnamed: 0':'OTU'}, inplace=True)
    df.set_index('OTU', inplace=True)
    return df

#Rarefied Read Numbers (From R script); make compositional
def make_rarereads_comp(infile):
    df= pd.read_csv(infile, sep=',')
    df.rename(columns={'Unnamed: 0':'OTU'}, inplace=True)
    df.set_index('OTU', inplace=True)
    df=df.T
    df['Total']=df.sum(axis=1)
    tot_reads = df['Total'].tolist()
    tot_reads = set(tot_reads)
    print('Rarefied read count:', tot_reads)
    tot_reads = df['Total'].tolist()[0]
    df = df/tot_reads *100
    df.drop('Total', axis=1, inplace=True)
    df=df.T
    return df

def make_rarereads(infile):
    df= pd.read_csv(infile, sep=',')
    df.rename(columns={'Unnamed: 0':'OTU'}, inplace=True)
    df.set_index('OTU', inplace=True)
    return df

#Taxa table
def make_taxa(infile):
    #infile = OTU_table_taxa_all.txt
    df = pd.read_csv(infile, sep='\t')
    df.rename(columns={'DUP_ID':'OTU'}, inplace=True)
    df = df[['taxonomy', 'OTU']]
    df.set_index('OTU', inplace=True)
    df['Kingdom']=df['taxonomy'].str.split("\'").str[1]
    df['Phylum']=df['taxonomy'].str.split("\'").str[3]
    df['Class']=df['taxonomy'].str.split("\'").str[5]
    df['Order']=df['taxonomy'].str.split("\'").str[7]
    df['Family']=df['taxonomy'].str.split("\'").str[9]
    df['Genus']=df['taxonomy'].str.split("\'").str[11]
    df['Species']=df['taxonomy'].str.split("\'").str[13]
    df=df.drop('taxonomy', axis=1)
    return df

#Filtered Taxa table
def make_Ftaxa(infile):
    df = pd.read_csv(infile, sep=',')
    df.rename(columns={'OTU_ID':'OTU'}, inplace=True)
    df.set_index('OTU', inplace=True)
    df=df[df.columns[-7:]]
    return df

#extract sequences
def make_seq(infile):
    #infile = OTU_table_taxa_all.txt
    df = pd.read_csv(infile, sep='\t')
    df.rename(columns={'DUP_ID':'OTU'}, inplace=True)
    df.set_index('OTU', inplace=True)
    df=df[['repseq']]
    return df

# Import Banzai Results

In [3]:
#COI M6
print('COI')
#OTU_table_taxa_all.txt location
file_loc1 = '/Users/kpitz/Projects/Gulf_of_California/Cutadapt_Results/COI/Analysis_20190921_1416/all_lib/Post_Blast_20190925_0914/OTU_table_taxa_all.txt'
#metadata
meta_file = '/Users/kpitz/Projects/Gulf_of_California/Cutadapt_Results/COI/Analysis_20190921_1416/GOC_20190921_1416_COI_analysis_metadata.csv'

#DESEQ data
#file2 = '/Users/kpitz/Projects/Gulf_of_California/Deseq/COI_GOC_DEseq_122117.csv'
#rarefied data
file3 = '/Users/kpitz/Projects/MBON/Rarefied_Data_unmerged/GOC_COI_OTU_Table_092619_M6.csv'
#Filtered OTU table
file4 ='/Users/kpitz/Projects/Gulf_of_California/Cutadapt_Results/COI/Analysis_20190921_1416/all_lib/Filtered_OTU_taxa_table_all.csv'

raw_COI = make_rawread(file_loc1)
meta_COI, samp_lim_COI = make_metadata(meta_file)
#deseq_COI = make_deseq(file2)
rare_COI = make_rarereads(file3)
rare_comp_COI = make_rarereads_comp(file3)
taxa_COI = make_taxa(file_loc1)
Ftaxa_COI = make_Ftaxa(file4)
seq_COI = make_seq(file_loc1)

#18S_M6
print('18S')
#OTU_table_taxa_all.txt location
file_loc1 = '/Users/kpitz/Projects/Gulf_of_California/Cutadapt_Results/18S/Analysis_20190924_1129/all_lib/Post_Blast_20190930_1343/OTU_table_taxa_all.txt'
#metadata
meta_file = '/Users/kpitz/Projects/Gulf_of_California/Cutadapt_Results/18S/Analysis_20190924_1129/GOC_18S_Metadata.csv'
#DESEQ data
#file2 = '/Users/kpitz/Projects/Gulf_of_California/Deseq/18S_GOC_DEseq_122117.csv'
#rarefied data
file3 = '/Users/kpitz/Projects/MBON/Rarefied_Data_unmerged/GOC_18S_OTU_Table_100119_M6.csv'
#Filtered OTU table
file4 = '/Users/kpitz/Projects/Gulf_of_California/Cutadapt_Results/18S/Analysis_20190924_1129/all_lib/Filtered_OTU_taxa_table_all.csv'


raw_18S = make_rawread(file_loc1)
meta_18S, samp_lim_18S = make_metadata(meta_file)
#deseq_18S = make_deseq(file2)
rare_18S = make_rarereads(file3)
rare_comp_18S = make_rarereads_comp(file3)
taxa_18S = make_taxa(file_loc1)
Ftaxa_18S = make_Ftaxa(file4)
seq_18S = make_seq(file_loc1)


#Directory for saving Figures
plot_dir = '/Users/kpitz/Projects/Gulf_of_California/GOC_18S_COI_Combined_Cutadapt/'
plot_name = 'GOC_18SCOI_'
Plot_str = plot_dir + plot_name
print(Plot_str)

#Use to change the name of the databases
name1 = 'GOC_18SCOI_'

COI
Rarefied read count: {129363}
18S
Rarefied read count: {28676}
/Users/kpitz/Projects/Gulf_of_California/GOC_18S_COI_Combined_Cutadapt/GOC_18SCOI_


# Import Filtered Banzai Results

Created in GOC_COI_Filter_Data.ipynb

- /Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_COI_seq_table_092519.csv
- /Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_COI_otu_table_092519.csv
- /Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_COI_taxa_table_092519.csv

In [4]:
files = ['/Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_COI_seq_table_092519.csv',
         '/Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_COI_otu_table_092519.csv',
         '/Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_COI_taxa_table_092519.csv']
#dfs = [seq_COI_filt, raw_COI_filt, Ftaxa_COI_filt]
df = pd.read_csv(files[0])
df.set_index('Unnamed: 0', inplace=True)
df.index.rename('OTU', inplace=True)
seq_COI_filt = df.copy()

df = pd.read_csv(files[1])
df.set_index('Unnamed: 0', inplace=True)
df.index.rename('OTU', inplace=True)
raw_COI_filt = df.copy()

df = pd.read_csv(files[2])
df.set_index('Unnamed: 0', inplace=True)
df.index.rename('OTU', inplace=True)
Ftaxa_COI_filt = df.copy()

raw_COI_filt.head()

#import metadata
meta_file ='/Users/kpitz/Projects/Gulf_of_California/Combined_PCTD_Metadata_043019.csv'
df= pd.read_csv(meta_file)
df=df.sort_values(['order'])
df.set_index('sample_ID', inplace=True)
print(list(df))

meta_COI_PCTD = df.copy()
meta_COI_PCTD.head()

['order', 'tag_sequence', 'primer_sequence_F', 'primer_sequence_R', 'library_tag_combo', 'library', 'sample_type', 'locus', 'tag_number', 'R1', 'R2', 'Treatment', 'Time_of_Day', 'Description', 'Description_3', 'site', 'SEQ', 'BOTTLE', 'DEPTH', 'CRUISE', 'PLATFORM', 'DEC_LAT', 'DEC_LONG', 'TMP', 'SAL', 'CHL_GFF', 'PRESSURE', 'NO3', 'OXY_ML', 'RDEP', 'TRANSMISS', 'SIG_T', 'FLUOR', 'DATE_TIME', 'cruise', 'SEQAvg_dg', 'AvgOfTMP', 'StDevOfTMP', 'CountOfTMP', 'AvgOfSAL1', 'StDevOfSAL', 'CountOfSAL', 'AvgOfCHLA', 'StDevOfCHLA', 'CountOfCHLA', 'AvgOfOXY_ML1', 'CountOfOXY_ML1', 'CountOfOXY_ML', 'AvgOfTRANSMISS', 'StDevOfTRANSMISS', 'CountOfTRANSMISS', 'AvgOfSIGMA_THETA', 'StDevOfSIGMA_THETA', 'CountOfSIGMA_THETA']


Unnamed: 0_level_0,order,tag_sequence,primer_sequence_F,primer_sequence_R,library_tag_combo,library,sample_type,locus,tag_number,R1,...,CountOfCHLA,AvgOfOXY_ML1,CountOfOXY_ML1,CountOfOXY_ML,AvgOfTRANSMISS,StDevOfTRANSMISS,CountOfTRANSMISS,AvgOfSIGMA_THETA,StDevOfSIGMA_THETA,CountOfSIGMA_THETA
sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
UC1,1.0,TTCTAGCT,CACGACGTTGTAAAACGAC,GGATAACAATTTCACACAGG,N7_TTCTAGCT,N7,environmental,COI,1,UC1-COI_S22_L001_R1_001.fastq.gz,...,101,4.649444,101,101,88.565879,1.576289,101,25.500296,0.326118,101
UC2,2.0,CCTAGAGT,CACGACGTTGTAAAACGAC,GGATAACAATTTCACACAGG,N8_CCTAGAGT,N8,environmental,COI,2,UC2-COI_S23_L001_R1_001.fastq.gz,...,100,4.906637,100,100,88.278839,1.77399,100,25.359315,0.297979,100
UC3,3.0,GCGTAAGA,CACGACGTTGTAAAACGAC,GGATAACAATTTCACACAGG,N11_GCGTAAGA,N11,environmental,COI,3,UC3-COI_S24_L001_R1_001.fastq.gz,...,100,3.438635,100,100,89.03849,2.019294,100,25.820711,0.395819,100
UC3_1,3.2,GCGTAAGA,CACGACGTTGTAAAACGAC,GGATAACAATTTCACACAGG,N9_GCGTAAGA,N9,environmental,COI,19,UC3-1-COI_S40_L001_R1_001.fastq.gz,...,100,3.438635,100,100,89.03849,2.019294,100,25.820711,0.395819,100
UC3_2,3.3,TTCTAGCT,CACGACGTTGTAAAACGAC,GGATAACAATTTCACACAGG,N10_TTCTAGCT,N10,environmental,COI,17,UC3-2-COI_S38_L001_R1_001.fastq.gz,...,100,3.438635,100,100,89.03849,2.019294,100,25.820711,0.395819,100


In [5]:
files = ['/Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_18S_seq_table_100119.csv',
         '/Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_18S_otu_table_100119.csv',
         '/Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_18S_taxa_table_100119.csv']
#dfs = [seq_18S_filt, raw_18S_filt, Ftaxa_18S_filt]
df = pd.read_csv(files[0])
df.set_index('Unnamed: 0', inplace=True)
df.index.rename('OTU', inplace=True)
seq_18S_filt = df.copy()

df = pd.read_csv(files[1])
df.set_index('Unnamed: 0', inplace=True)
df.index.rename('OTU', inplace=True)
raw_18S_filt = df.copy()

df = pd.read_csv(files[2])
df.set_index('Unnamed: 0', inplace=True)
df.index.rename('OTU', inplace=True)
Ftaxa_18S_filt = df.copy()

df.head()

Unnamed: 0_level_0,Kingdom,Phylum,Class,Order,Family,Genus,Species
OTU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
OTU_1,Eukaryota,Arthropoda,Hexanauplia,Calanoida,Calanidae,Calanus,s_
OTU_10,Eukaryota,Arthropoda,Hexanauplia,Calanoida,Metridinidae,Metridia,Metridia lucens
OTU_100,Eukaryota,Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,g_,s_
OTU_100012,Eukaryota,Arthropoda,Hexanauplia,Calanoida,Metridinidae,Metridia,Metridia lucens
OTU_100017,Eukaryota,Arthropoda,Hexanauplia,Calanoida,Calanidae,Calanus,s_


# Import USEARCH Data

In [6]:
#rarefied datasets
Geller_18S = pd.read_csv('/Users/kpitz/Projects/Gulf_of_California/Geller_lab_data/18S_Geller_MGmodified_9Jan19.txt', sep='	')
Geller_18S.set_index('#OTU ID', inplace=True)
Geller_18S
#original 18S before mistake fixed (OTU_67 included and taxonomy wrong- sequences also wrong)
#Geller_18So = pd.read_csv('/Users/kpitz/Projects/Gulf_of_California/Geller_lab_data/Incorrect_18S/18S_Geller.txt', sep='	')
#Geller_18So
Geller_COI = pd.read_csv('/Users/kpitz/Projects/Gulf_of_California/Geller_lab_data/COI_Geller.txt', sep='	')
Geller_COI.set_index('#OTU ID', inplace=True)
Geller_COI

#non-rarefied datasets
file= '/Users/kpitz/Projects/Gulf_of_California/Geller_lab_data/Not_rarefied/MBARI_18S_otu_table_10097_txt.txt'
Geller_18S_nr = pd.read_csv(file, sep='	')
Geller_18S_nr.set_index('#OTU ID', inplace=True)
Geller_18S_nr
file= '/Users/kpitz/Projects/Gulf_of_California/Geller_lab_data/Not_rarefied/MBARI_COI_otu_table_10095_txt.txt'
Geller_COI_nr = pd.read_csv(file, sep='	')
Geller_COI_nr.set_index('#OTU ID', inplace=True)
Geller_18S_nr.head()




Unnamed: 0_level_0,CP23_1,CP23,CP23_2,GOC2a,GOC2b,UC10,UC1,UC12,UC13,UC14,UC15,UC2,UC3_1,UC3,UC3_2,UC4,UC5,UC6,UC7,UC9
#OTU ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
OTU_6,19231,22170,14287,34957,22575,68966,6533,28481,32333,14665,34219,3970,28208,41001,39997,13079,2023,373,10606,1568
OTU_28,16679,18639,13582,1344,1294,5722,25575,26363,29378,14098,32274,21545,20676,23539,31666,16134,14833,35070,25123,24619
OTU_89,10753,11618,4602,22,0,1691,0,1196,223,1298,725,3,0,0,0,3,0,0,0,1
OTU_34,7817,9214,2665,1670,1346,306,27,2595,1881,2881,2781,1955,189,299,68,19,17,1011,36,40
OTU_168,3404,3930,1326,490,244,0,0,1,0,773,100,0,0,0,0,0,0,0,0,0


In [7]:
#Make compositional rarefied OTU table and taxa table
# Format Geller Data to get OTU table / taxa table
print(list(Ftaxa_COI))
#levels = list(Ftaxa_COI)
levels =['Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
df = Geller_COI.copy()
df['Species'] = df['Genus'] + ' ' + df['Species']
df.fillna('no hits', inplace=True)
df['Class']=df['Class'].str.replace('Maxillopoda', 'Hexanauplia')
#df.set_index('#OTU ID', inplace=True)

df[levels]=df[levels].astype(str)
#get rid of white space
for i in levels:
    df[i]=df[i].str.strip()

cols = list(df)
for i in range(len(cols)):
    cols[i] = cols[i].replace('.','_')
df.columns = cols

Geller_COI_taxa = df[levels]

Geller_COI_otu = df[['CP23_1', 'CP23', 'CP23_2', 'GOC2a', 'GOC2b', 'UC10', 'UC1', 'UC12', 'UC13', 
                     'UC14', 'UC15', 'UC2', 'UC3_1', 'UC3', 'UC3_2', 'UC4', 'UC5', 'UC6', 'UC7', 'UC9']]
print(list(df))
#This is rarefied data. Create a percent rarefied data OTU table
df = Geller_COI_otu.copy()
df=df.T
cols = list(df)
df['tot']=df.sum(axis=1)
for i in cols:
    df[i]=df[i]/df['tot'] *100
df.drop('tot', axis=1, inplace=True)
df=df.T
Geller_COI_otu_comp = df.copy()
df

['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
['CP23_1', 'CP23_2', 'CP23', 'GOC2a', 'GOC2b', 'UC10', 'UC12', 'UC13', 'UC14', 'UC15', 'UC1', 'UC2', 'UC3_1', 'UC3_2', 'UC3', 'UC4', 'UC5', 'UC6', 'UC7', 'UC9', 'Sequence', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']


Unnamed: 0_level_0,CP23_1,CP23,CP23_2,GOC2a,GOC2b,UC10,UC1,UC12,UC13,UC14,UC15,UC2,UC3_1,UC3,UC3_2,UC4,UC5,UC6,UC7,UC9
#OTU ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
OTU_1,0.212719,0.245167,0.060691,9.074794,7.174748,0.189884,0.000000,1.415120,1.799095,1.382671,4.411208,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000601,0.000000
OTU_10,1.819526,1.790682,1.089432,0.116575,0.146620,2.282219,0.000000,6.706046,8.487715,5.893629,14.037027,0.000000,0.000000,0.000000,0.015623,0.001803,0.000000,0.016224,0.000000,0.243965
OTU_100,0.183275,0.233750,0.144817,0.036655,0.000000,0.182674,0.000000,0.007812,0.198297,0.003005,0.044467,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
OTU_1000,0.000000,0.000000,0.000000,0.004807,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
OTU_1001,0.000000,0.000000,0.000000,0.000000,0.002404,0.000000,0.000000,0.000000,0.000000,0.000000,0.000601,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
OTU_1002,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.194692,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
OTU_1003,0.000000,0.000000,0.000000,0.096144,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
OTU_1004,0.000000,0.000000,0.000000,0.009014,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
OTU_1005,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.013821,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
OTU_1006,0.010215,0.015623,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [8]:
#Make compositional rarefied OTU table and taxa table
#18S
# Format Geller Data to get OTU table / taxa table
print(list(Ftaxa_18S))
#levels = list(Ftaxa_18S)
levels =['Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
df = Geller_18S.copy()
df['Species'] = df['Genus'] + ' ' + df['Species']
df.fillna('no hits', inplace=True)
df['Class']=df['Class'].str.replace('Maxillopoda', 'Hexanauplia')
#df.set_index('#OTU ID', inplace=True)

df[levels]=df[levels].astype(str)
#get rid of white space
for i in levels:
    df[i]=df[i].str.strip()

cols = list(df)
for i in range(len(cols)):
    cols[i] = cols[i].replace('.','_')
df.columns = cols

Geller_18S_taxa = df[levels]

Geller_18S_otu = df[['CP23_1', 'CP23', 'CP23_2', 'GOC2a',  'UC10', 'UC1', 'UC12', 'UC13', 
                     'UC14', 'UC15', 'UC2', 'UC3_1', 'UC3', 'UC3_2', 'UC4', 'UC5', 'UC6', 'UC7', 'UC9']]
print(list(df))
#This is rarefied data. Create a percent rarefied data OTU table
df = Geller_18S_otu.copy()
df=df.T
cols = list(df)
df['tot']=df.sum(axis=1)
for i in cols:
    df[i]=df[i]/df['tot'] *100
df.drop('tot', axis=1, inplace=True)
df=df.T
Geller_18S_otu_comp = df.copy()
df

['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
['CP23_1', 'CP23', 'CP23_2', 'GOC2a', 'UC10', 'UC1', 'UC12', 'UC13', 'UC14', 'UC15', 'UC2', 'UC3_1', 'UC3', 'UC3_2', 'UC4', 'UC5', 'UC6', 'UC7', 'UC9', 'Sequence', 'Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species', 'OrganismDescriptionFromGenBank']


Unnamed: 0_level_0,CP23_1,CP23,CP23_2,GOC2a,UC10,UC1,UC12,UC13,UC14,UC15,UC2,UC3_1,UC3,UC3_2,UC4,UC5,UC6,UC7,UC9
#OTU ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
OTU_1,0.001173,0.001173,0.000000,0.000000,0.068012,0.075048,0.000000,0.000000,0.001173,0.000000,0.024625,0.015244,0.025798,0.008208,0.025798,0.900573,0.005863,0.304882,0.001173
OTU_10,0.024625,0.026970,0.041042,0.001173,0.000000,0.000000,0.260322,0.004690,0.000000,0.007036,0.003518,0.792692,0.722335,3.153180,0.024625,0.021107,0.724680,0.725853,0.000000
OTU_100,0.001173,0.000000,0.000000,0.002345,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.002345,0.002345,0.002345,0.290810,0.000000,0.004690,0.001173,0.000000
OTU_101,0.000000,0.000000,0.000000,0.000000,0.000000,0.003518,0.000000,0.000000,0.000000,0.000000,0.001173,0.002345,0.005863,0.001173,0.000000,0.056286,0.000000,0.022280,0.000000
OTU_102,0.000000,0.001173,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.003518,0.000000,0.015244,0.018762,0.005863,0.043387,0.017589,0.003518,0.004690,0.000000
OTU_103,5.182988,6.199651,3.483859,0.376412,0.003518,0.000000,0.341233,0.016417,0.300191,3.218846,0.000000,0.002345,0.002345,0.000000,0.001173,0.000000,0.000000,0.000000,0.001173
OTU_104,0.000000,0.000000,0.000000,0.000000,0.001173,0.003518,0.000000,0.000000,0.000000,0.000000,0.002345,0.000000,0.003518,0.002345,0.000000,0.063322,0.000000,0.016417,0.000000
OTU_105,0.000000,0.000000,0.000000,0.001173,0.472567,0.000000,0.016417,0.007036,0.029316,0.003518,0.000000,0.003518,0.004690,0.002345,0.000000,0.090292,0.011726,0.303709,0.000000
OTU_106,0.001173,0.000000,0.001173,0.000000,0.000000,0.144232,0.004690,0.000000,0.000000,0.002345,0.220453,0.140715,0.168858,0.110226,0.007036,0.093810,0.012899,0.077393,0.014071
OTU_107,0.000000,0.000000,0.000000,0.012899,0.352959,0.000000,0.002345,0.037524,0.066839,0.004690,0.001173,0.007036,0.002345,0.008208,0.000000,0.077393,0.005863,0.294328,0.000000


# Format Metadata

In [9]:
meta_dat = meta_COI.reset_index()
print(list(meta_dat))
#meta_dat['site']= meta_dat.sample_name.str.split('.').str[:-1].str.join('.')
meta_dat['site_Order']=meta_dat['site'].str.replace('GOC2a', '16').str.replace('GOC2b', '17').str.replace('NTC', '0')
meta_dat['site_Order']=meta_dat['site_Order'].str.extract('(\d+)')
meta_dat['site_Order']=meta_dat['site_Order'].astype(int)
meta_dat= meta_dat.loc[meta_dat['sample_type']=='environmental']
meta_dat.sort_values(['site_Order', 'sample_name'], inplace=True)
meta_dat['sample_name']=meta_dat['sample_name'].str.replace('_COI','')
meta_dat['sample_name']=meta_dat['sample_name'].str.replace('.','_')
meta_dat.set_index('sample_name', inplace=True)

meta_dat.head()

['sample_name', 'order', 'tag_sequence', 'tag_sequence_orig', 'primer_sequence_F', 'primer_sequence_R', 'library_tag_combo', 'library', 'sample_type', 'locus', 'tag_number', 'R1', 'R2', 'Treatment', 'depth', 'dec_lat', 'dec_long', 'fluor', 'tmp', 'sal', 'sigma_theta', 'conduct', 'oxy_ml', 'transmiss', 'Description', 'site']


Unnamed: 0_level_0,order,tag_sequence,tag_sequence_orig,primer_sequence_F,primer_sequence_R,library_tag_combo,library,sample_type,locus,tag_number,...,fluor,tmp,sal,sigma_theta,conduct,oxy_ml,transmiss,Description,site,site_Order
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
UC1,1,TTCTAGCT_AA,TTCTAGCT,GGWACWGGWTGAACWGTWTAYCCYCC,TANACYTCNGGRTGNCCRAARAAYCA,N7_TTCTAGCT,N7,environmental,COI,1,...,0.317525,12.57277917,33.31300833,25.17000417,3.879983208,5.731966667,86.42458333,PCNorth,UC1,1
UC2,2,CCTAGAGT_AA,CCTAGAGT,GGWACWGGWTGAACWGTWTAYCCYCC,TANACYTCNGGRTGNCCRAARAAYCA,N8_CCTAGAGT,N8,environmental,COI,2,...,0.327413043,12.72407391,33.28353913,25.11782174,3.890925304,5.685595652,85.7651913,PCNorth,UC2,2
UC3_1,19,GCGTAAGA_AT,GCGTAAGA,GGWACWGGWTGAACWGTWTAYCCYCC,TANACYTCNGGRTGNCCRAARAAYCA,N9_GCGTAAGA,N9,environmental,COI,19,...,0.351265217,12.57266522,33.42426522,25.25619565,3.891625217,5.249904348,85.76833913,PCNorth,UC3,3
UC3_2,17,TTCTAGCT_AT,TTCTAGCT,GGWACWGGWTGAACWGTWTAYCCYCC,TANACYTCNGGRTGNCCRAARAAYCA,N10_TTCTAGCT,N10,environmental,COI,17,...,0.351265217,12.57266522,33.42426522,25.25619565,3.891625217,5.249904348,85.76833913,PCNorth,UC3,3
UC3,3,GCGTAAGA_AA,GCGTAAGA,GGWACWGGWTGAACWGTWTAYCCYCC,TANACYTCNGGRTGNCCRAARAAYCA,N11_GCGTAAGA,N11,environmental,COI,3,...,0.351265217,12.57266522,33.42426522,25.25619565,3.891625217,5.249904348,85.76833913,PCNorth,UC3,3


# Euphausiid

In [69]:
#OTUs, raw reads, samples detected across both markers, both pipelines

df= pd.concat([Ftaxa_COI_filt, raw_COI_filt], axis=1)
df=df.loc[df['Family']=='Euphausiidae']
levels = list(Ftaxa_COI_filt)[1:]
df['OTUs']=1
df=df.groupby(levels).sum()
df1 =df[['OTUs']]
df.drop('OTUs', axis=1, inplace=True)
#df['tot']=df.sum(axis=1)
df=pd.concat([df.T, meta_dat['sample_type']], axis=1 )
df=df.loc[df['sample_type']=='environmental']
df=df.drop('sample_type', axis=1)
df=df.T
cols = list(df)
df['Raw_Reads']= df.sum(axis=1)
df['Samples_Detected']=df[cols].astype(bool).astype(int).sum(axis=1)
df=df[['Raw_Reads', 'Samples_Detected']]

for i in range(len(levels)):
    df[levels[i]]=df.index.str[i]
df.set_index(levels, inplace=True)
df=pd.concat([df1, df], axis=1)
Eu_COI_BANZAI_Filt = df.copy()
Eu_COI_BANZAI_Filt

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,OTUs,Raw_Reads,Samples_Detected
Phylum,Class,Order,Family,Genus,Species,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia diomedeae,13,861,7
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia distinguenda,47,10321,9
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia eximia,60,48209,10
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia gibboides,9,780,1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia lamelligera,19,15270,9
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia pacifica,84,110418,13
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia recurva,6,273,3
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia tenera,97,20784,9
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,s_,81,688,16
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Nematoscelis,Nematoscelis difficilis,142,68288,15


In [70]:
#OTUs, raw reads, samples detected across both markers, both pipelines

df= pd.concat([Ftaxa_18S_filt, raw_18S_filt], axis=1)
df=df.loc[df['Family']=='Euphausiidae']
levels = list(Ftaxa_COI_filt)[1:]
df['OTUs']=1
df=df.groupby(levels).sum()
df1 =df[['OTUs']]
df.drop('OTUs', axis=1, inplace=True)
#df['tot']=df.sum(axis=1)
df=pd.concat([df.T, meta_dat['sample_type']], axis=1 )
df=df.loc[df['sample_type']=='environmental']
df=df.drop('sample_type', axis=1)
df=df.T
cols = list(df)
df['Raw_Reads']= df.sum(axis=1)
df['Samples_Detected']=df[cols].astype(bool).astype(int).sum(axis=1)
df=df[['Raw_Reads', 'Samples_Detected']]

for i in range(len(levels)):
    df[levels[i]]=df.index.str[i]
df.set_index(levels, inplace=True)
df=pd.concat([df1, df], axis=1)
Eu_18S_BANZAI_Filt = df.copy()
Eu_18S_BANZAI_Filt

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,OTUs,Raw_Reads,Samples_Detected
Phylum,Class,Order,Family,Genus,Species,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia brevis,38,980,8
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,s_,1285,77365,20
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Nyctiphanes,Nyctiphanes simplex,282,4662,6
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Nyctiphanes,s_,37,82,4
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Thysanoessa,s_,223,5358,11
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,g_,s_,4533,66612,20


In [80]:
#unfiltered COI
#OTUs, raw reads, samples detected across both markers, both pipelines

df= pd.concat([taxa_COI, raw_COI], axis=1)
df=df.loc[df['Family']=='Euphausiidae']
levels = list(Ftaxa_COI_filt)[1:]
df['OTUs']=1
df=df.groupby(levels).sum()
df1 =df[['OTUs']]
df.drop('OTUs', axis=1, inplace=True)
cols = list(df)
for i in range(len(cols)):
    cols[i] = cols[i].replace('_COI','')
df.columns = cols
#df['tot']=df.sum(axis=1)
df=pd.concat([df.T, meta_dat['sample_type']], axis=1 )
df=df.loc[df['sample_type']=='environmental']
df=df.drop('sample_type', axis=1)
df=df.T
cols = list(df)
df['Raw_Reads']= df.sum(axis=1)
df['Samples_Detected']=df[cols].astype(bool).astype(int).sum(axis=1)
df=df[['Raw_Reads', 'Samples_Detected']]

for i in range(len(levels)):
    df[levels[i]]=df.index.str[i]
df.set_index(levels, inplace=True)
df=pd.concat([df1, df], axis=1)
Eu_COI_BANZAI = df.copy()
Eu_COI_BANZAI



of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,OTUs,Raw_Reads,Samples_Detected
Phylum,Class,Order,Family,Genus,Species,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia diomedeae,16,871,7
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia distinguenda,76,10561,9
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia eximia,67,48253,13
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia gibboides,14,828,1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia lamelligera,45,15477,9
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia pacifica,149,110798,15
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia recurva,10,282,4
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia sibogae,72,533,8
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia tenera,154,22098,10
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,s_,7,40,5


In [81]:
#unfiltered 18S
#OTUs, raw reads, samples detected across both markers, both pipelines

df= pd.concat([taxa_18S, raw_18S], axis=1)
df=df.loc[df['Family']=='Euphausiidae']
levels = list(Ftaxa_18S_filt)[1:]
df['OTUs']=1
df=df.groupby(levels).sum()
df1 =df[['OTUs']]
df.drop('OTUs', axis=1, inplace=True)
cols = list(df)
for i in range(len(cols)):
    cols[i] = cols[i].replace('_18S','')
df.columns = cols
#df['tot']=df.sum(axis=1)
df=pd.concat([df.T, meta_dat['sample_type']], axis=1 )
df=df.loc[df['sample_type']=='environmental']
df=df.drop('sample_type', axis=1)
df=df.T
cols = list(df)
df['Raw_Reads']= df.sum(axis=1)
df['Samples_Detected']=df[cols].astype(bool).astype(int).sum(axis=1)
df=df[['Raw_Reads', 'Samples_Detected']]

for i in range(len(levels)):
    df[levels[i]]=df.index.str[i]
df.set_index(levels, inplace=True)
df=pd.concat([df1, df], axis=1)
Eu_18S_BANZAI = df.copy()
Eu_18S_BANZAI



of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,OTUs,Raw_Reads,Samples_Detected
Phylum,Class,Order,Family,Genus,Species,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia americana,1,4,3
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia brevis,43,991,9
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia mutica,1,2,1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia superba,3,6,2
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia tenera,16,35,6
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,s_,3613,82302,20
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Meganyctiphanes,Meganyctiphanes norvegica,1,3,1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Nematoscelis,s_,2,5,2
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Nyctiphanes,Nyctiphanes simplex,357,4857,6
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Stylocheiron,Stylocheiron maximum,1,2,1


In [66]:
#Geller

#unfiltered COI
#OTUs, raw reads, samples detected across both markers, both pipelines

df= pd.concat([Geller_COI_nr, Geller_COI_taxa], axis=1)
df=df.loc[df['Family']=='Euphausiidae']

levels = list(Geller_COI_taxa)
df['OTUs']=1
df=df.groupby(levels).sum()
df1 =df[['OTUs']]
df.drop('OTUs', axis=1, inplace=True)
#df['tot']=df.sum(axis=1)
#df=pd.concat([df.T, meta_dat['sample_type']], axis=1 )
#df=df.loc[df['sample_type']=='environmental']
#df=df.drop('sample_type', axis=1)
#df=df.T
cols = list(df)
df['Raw_Reads']= df.sum(axis=1)
df['Samples_Detected']=df[cols].astype(bool).astype(int).sum(axis=1)
df=df[['Raw_Reads', 'Samples_Detected']]

#for i in range(len(levels)):
#    df[levels[i]]=df.index.str[i]
#df.set_index(levels, inplace=True)

df=pd.concat([df1, df], axis=1)
Eu_COI_Gell = df.copy()
Eu_COI_Gell


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,OTUs,Raw_Reads,Samples_Detected
Phylum,Class,Order,Family,Genus,Species,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia diomedeae,1,1350,7
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia eximia,1,58933,13
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia gibboides,1,1401,1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia lamelligera,1,18913,9
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia pacifica,1,131811,15
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia recurva,1,503,3
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia sibogae,2,756,8
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia tenera,2,5543,9
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,no hits,2,40031,11
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Nematoscelis,Nematoscelis difficilis,3,112248,15


In [67]:
#Geller

#unfiltered 18S
#OTUs, raw reads, samples detected across both markers, both pipelines

df= pd.concat([Geller_18S_nr, Geller_18S_taxa], axis=1)
df=df.loc[df['Family']=='Euphausiidae']

levels = list(Geller_18S_taxa)
df['OTUs']=1
df=df.groupby(levels).sum()
df1 =df[['OTUs']]
df.drop('OTUs', axis=1, inplace=True)
#df['tot']=df.sum(axis=1)
#df=pd.concat([df.T, meta_dat['sample_type']], axis=1 )
#df=df.loc[df['sample_type']=='environmental']
#df=df.drop('sample_type', axis=1)
#df=df.T
cols = list(df)
df['Raw_Reads']= df.sum(axis=1)
df['Samples_Detected']=df[cols].astype(bool).astype(int).sum(axis=1)
df=df[['Raw_Reads', 'Samples_Detected']]

#for i in range(len(levels)):
#    df[levels[i]]=df.index.str[i]
#df.set_index(levels, inplace=True)

df=pd.concat([df1, df], axis=1)
Eu_18S_Gell = df.copy()
Eu_18S_Gell


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,OTUs,Raw_Reads,Samples_Detected
Phylum,Class,Order,Family,Genus,Species,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,no hits,3,164471,20
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Meganyctiphanes,no hits,5,1542,10
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Nematoscelis,Nematoscelis difficilis,1,2556,14
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Nematoscelis,no hits,75,131997,20
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Nyctiphanes,no hits,2,12653,20
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Tessarabrachion,no hits,1,25,4


In [82]:
keys = ['COI_Banzai_Filt', 'COI_Banzai_Megan', '18S_Banzai_Filt', '18S_Banzai_Megan', 'USEARCH_COI', 'USEARCH_18S']
df= pd.concat([Eu_COI_BANZAI_Filt, Eu_COI_BANZAI, Eu_18S_BANZAI_Filt, Eu_18S_BANZAI, Eu_COI_Gell, Eu_18S_Gell], keys=keys, axis=1)
df.to_csv('/Users/kpitz/Projects/Gulf_of_California/Cutadapt_Results/Euphausiid_Abundance_Across_datasets.csv')
df



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,COI_Banzai_Filt,COI_Banzai_Filt,COI_Banzai_Filt,COI_Banzai_Megan,COI_Banzai_Megan,COI_Banzai_Megan,18S_Banzai_Filt,18S_Banzai_Filt,18S_Banzai_Filt,18S_Banzai_Megan,18S_Banzai_Megan,18S_Banzai_Megan,USEARCH_COI,USEARCH_COI,USEARCH_COI,USEARCH_18S,USEARCH_18S,USEARCH_18S
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,OTUs,Raw_Reads,Samples_Detected,OTUs,Raw_Reads,Samples_Detected,OTUs,Raw_Reads,Samples_Detected,OTUs,Raw_Reads,Samples_Detected,OTUs,Raw_Reads,Samples_Detected,OTUs,Raw_Reads,Samples_Detected
Phylum,Class,Order,Family,Genus,Species,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia americana,,,,,,,,,,1.0,4.0,3.0,,,,,,
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia brevis,,,,,,,38.0,980.0,8.0,43.0,991.0,9.0,,,,,,
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia diomedeae,13.0,861.0,7.0,16.0,871.0,7.0,,,,,,,1.0,1350.0,7.0,,,
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia distinguenda,47.0,10321.0,9.0,76.0,10561.0,9.0,,,,,,,,,,,,
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia eximia,60.0,48209.0,10.0,67.0,48253.0,13.0,,,,,,,1.0,58933.0,13.0,,,
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia gibboides,9.0,780.0,1.0,14.0,828.0,1.0,,,,,,,1.0,1401.0,1.0,,,
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia lamelligera,19.0,15270.0,9.0,45.0,15477.0,9.0,,,,,,,1.0,18913.0,9.0,,,
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia mutica,,,,,,,,,,1.0,2.0,1.0,,,,,,
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia pacifica,84.0,110418.0,13.0,149.0,110798.0,15.0,,,,,,,1.0,131811.0,15.0,,,
Arthropoda,Malacostraca,Euphausiacea,Euphausiidae,Euphausia,Euphausia recurva,6.0,273.0,3.0,10.0,282.0,4.0,,,,,,,1.0,503.0,3.0,,,
