# ADAPTING MICROBIOME COUNTS
There's a difference in the number of replicates in the microbiome samples vs in the resistome samples due to the high DNA requirements the resistome analysis depends on.

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
os.chdir("../data/r_data")
micro_data = pd.read_csv("rclr_zotu_counts.csv", sep = ",", index_col = 0)
micro_data.head()

Unnamed: 0,s_1,s_10,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,...,s_6,s_7,s_8,s_9,s_28,s_29,s_30,s_31,s_32,s_33
zOTU_1038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.780785,0.0,-0.52627,2.054452,-0.07456,-0.484203,-0.026294,0.0,0.0,0.0
zOTU_255,0.0,-0.595558,-0.684264,-0.103321,-1.524934,0.0,0.0,2.214191,2.140224,1.967346,...,-2.707852,-0.972064,1.334483,-0.205574,2.992562,2.76099,2.792104,0.0,-2.893764,-1.802858
zOTU_885,0.381853,1.668806,1.198468,1.374781,0.015511,-0.477156,-0.2226,1.565104,0.820375,1.194157,...,0.0,-1.195208,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zOTU_638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.233791,-1.48289,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zOTU_1166,-1.322895,0.0,0.0,-1.281976,-2.623546,-0.72847,-2.707506,-0.192754,-0.655531,-0.731134,...,1.419283,-2.581502,0.0,-0.74457,0.0,0.0,0.0,0.0,0.0,0.0


First things: fix the number of replicates for soil and water

In [3]:
# Fix the soil in ion
micro_data["argl_13"] = (micro_data["s_16"] + micro_data["s_17"] + micro_data["s_18"]) // 3
# Fix the water in ion
micro_data["argl_14"] = (micro_data["s_28"] + micro_data["s_29"] + micro_data["s_30"]) // 3
micro_data.head()

Unnamed: 0,s_1,s_10,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,...,s_8,s_9,s_28,s_29,s_30,s_31,s_32,s_33,argl_13,argl_14
zOTU_1038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.52627,2.054452,-0.07456,-0.484203,-0.026294,0.0,0.0,0.0,0.0,-1.0
zOTU_255,0.0,-0.595558,-0.684264,-0.103321,-1.524934,0.0,0.0,2.214191,2.140224,1.967346,...,1.334483,-0.205574,2.992562,2.76099,2.792104,0.0,-2.893764,-1.802858,2.0,2.0
zOTU_885,0.381853,1.668806,1.198468,1.374781,0.015511,-0.477156,-0.2226,1.565104,0.820375,1.194157,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
zOTU_638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zOTU_1166,-1.322895,0.0,0.0,-1.281976,-2.623546,-0.72847,-2.707506,-0.192754,-0.655531,-0.731134,...,0.0,-0.74457,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0


Now, fix the replicates in plastic. It isn't so inmediate, as there's 2 for 3 (I will add 2 of those 3 and divide them by two, while keeping the third intact)

In [4]:
# Fix the PUR in ion
micro_data["argl_17"] = (micro_data["s_13"] + micro_data["s_14"]) // 2
# Fix the EPS in ion
micro_data["argl_15"] = (micro_data["s_10"] + micro_data["s_11"]) // 2
micro_data.head()

Unnamed: 0,s_1,s_10,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,...,s_28,s_29,s_30,s_31,s_32,s_33,argl_13,argl_14,argl_17,argl_15
zOTU_1038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.07456,-0.484203,-0.026294,0.0,0.0,0.0,0.0,-1.0,0.0,0.0
zOTU_255,0.0,-0.595558,-0.684264,-0.103321,-1.524934,0.0,0.0,2.214191,2.140224,1.967346,...,2.992562,2.76099,2.792104,0.0,-2.893764,-1.802858,2.0,2.0,-1.0,-1.0
zOTU_885,0.381853,1.668806,1.198468,1.374781,0.015511,-0.477156,-0.2226,1.565104,0.820375,1.194157,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,1.0
zOTU_638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zOTU_1166,-1.322895,0.0,0.0,-1.281976,-2.623546,-0.72847,-2.707506,-0.192754,-0.655531,-0.731134,...,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,-2.0,0.0


And finishing touches: drop "duplicate" columns and rename those that are ok as is

In [5]:
correspondance_dict = {"s_1": "argl_1", # 3 uru soil to uru soil
                       "s_2": "argl_2", 
                       "s_3": "argl_3", 
                       "s_31": "argl_4", # 3 uru water to 3 uru water
                       "s_32": "argl_5",
                       "s_33": "argl_6", 
                       "s_4": "argl_7", # 3 uru EPS to 3 uru EPS
                       "s_5": "argl_8", 
                       "s_6": "argl_9", 
                       "s_7": "argl_10", # 3 uru PUR to 3 uru PUR
                       "s_8": "argl_11",
                       "s_9": "argl_12", 
                       "s_12": "argl_16", # 13-14-15 are already defined, so 1 ion EPS left to 1 ion EPS
                       "s_15": "argl_18", # 17 is already defined, so 1 ion PUR left to ion PUR
                       "s_25": "argl_19", # 3 ardley soil to ardley soil
                       "s_26": "argl_20", 
                       "s_27": "argl_21", 
                       "s_19": "argl_22", # 3 ardley EPS to ardley soil
                       "s_20": "argl_23", 
                       "s_21": "argl_24",
                       "s_22": "argl_25", # 3 ardley PUR to ardley soil
                       "s_23": "argl_26", 
                       "s_24": "argl_27",
                      }
micro_data.rename(columns = correspondance_dict, inplace = True)
micro_data.drop(columns = ["s_16", "s_17", "s_18", "s_28", "s_29", "s_30", "s_13", "s_14", "s_10", "s_11"], inplace = True)
print(micro_data.columns)

Index(['argl_1', 'argl_16', 'argl_18', 'argl_22', 'argl_2', 'argl_23',
       'argl_24', 'argl_25', 'argl_26', 'argl_27', 'argl_19', 'argl_20',
       'argl_21', 'argl_3', 'argl_7', 'argl_8', 'argl_9', 'argl_10', 'argl_11',
       'argl_12', 'argl_4', 'argl_5', 'argl_6', 'argl_13', 'argl_14',
       'argl_17', 'argl_15'],
      dtype='object')


In [6]:

micro_data.to_csv("rclr_counts_resistome_format.csv")