In [34]:
## INSTALLATION
## -------------------------------------------------------------------------------------------

## conda install ipyrad -c ipyrad
## conda install toytree -c eaton-lab

## import ipyrad into python for api
import ipyrad as ip
import ipyparallel as ipp
import ipyrad.analysis as ipa

In [35]:
## PARRALELIZATION
## -------------------------------------------------------------------------------------------

## run the below line in terminal, or with '!', not in jupyter notebooks
## this will start and ipcluster 
#!ipcluster start -n 48 --cluster-id="ipyrad" --daemonize

## this will stop ipcluster
#!ipcluster stop --cluster-id="ipyrad" 

## check to make sure the client is working
## should print the number of cores ready to go
ipyclient = ipp.Client(cluster_id="ipyrad")

In [36]:
## check client
print(len(ipyclient))
#ipyclient.ids

48


# Lib 1

In [4]:
## Now we will demultiplex the second library set, we will do this in the same way as the first set. 
data_lib1 = ip.Assembly("tsuga_lib1")
data_lib1.set_params("raw_fastq_path", "./*lib1.fastq.gz")
data_lib1.set_params("barcodes_path", "./barcodes_lib1.txt")
data_lib1.set_params("datatype", "ddrad")
data_lib1.set_params("output_formats", "*")
data_lib1.set_params("restriction_overhang", ("TGCAG", "CCGG"))

data_lib1.get_params()

New Assembly: tsuga_lib1


0   assembly_name               tsuga_lib1                                   
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              ./*lib1.fastq.gz                             
3   barcodes_path               ./barcodes_lib1.txt                          
4   sorted_fastq_path                                                        
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        6                                            
12  mindepth_majrule            6    

# Lib 2

In [5]:
## Now we will demultiplex the second library set, we will do this in the same way as the first set. 
data_lib2 = ip.Assembly("tsuga_lib2")
data_lib2.set_params("raw_fastq_path", "./*lib2.fastq.gz")
data_lib2.set_params("barcodes_path", "./barcodes_lib2.txt")
data_lib2.set_params("datatype", "ddrad")
data_lib2.set_params("output_formats", "*")
data_lib2.set_params("restriction_overhang", ("TGCAG", "CCGG"))

data_lib2.get_params()

New Assembly: tsuga_lib2


0   assembly_name               tsuga_lib2                                   
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              ./*lib2.fastq.gz                             
3   barcodes_path               ./barcodes_lib2.txt                          
4   sorted_fastq_path                                                        
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        6                                            
12  mindepth_majrule            6    

# Lib 3

In [6]:
## Now we will demultiplex the third library set. 
data_lib3 = ip.Assembly("tsuga_lib3")
data_lib3.set_params("raw_fastq_path", "./*lib3.fastq.gz")
data_lib3.set_params("barcodes_path", "./barcodes_lib3.txt")
data_lib3.set_params("datatype", "ddrad")
data_lib3.set_params("output_formats", "*")
data_lib3.set_params("restriction_overhang", ("TGCAG", "CCGG"))

data_lib3.get_params()

New Assembly: tsuga_lib3


0   assembly_name               tsuga_lib3                                   
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              ./*lib3.fastq.gz                             
3   barcodes_path               ./barcodes_lib3.txt                          
4   sorted_fastq_path                                                        
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        6                                            
12  mindepth_majrule            6    

# Lib 4

In [7]:
## Now we will demultiplex the third library set. 
data_lib4 = ip.Assembly("tsuga_lib4")
data_lib4.set_params("raw_fastq_path", "./*lib4.fastq.gz")
data_lib4.set_params("barcodes_path", "./barcodes_lib4.txt")
data_lib4.set_params("datatype", "ddrad")
data_lib4.set_params("output_formats", "*")
data_lib4.set_params("restriction_overhang", ("TGCAG", "CCGG"))

data_lib4.get_params()

New Assembly: tsuga_lib4


0   assembly_name               tsuga_lib4                                   
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              ./*lib4.fastq.gz                             
3   barcodes_path               ./barcodes_lib4.txt                          
4   sorted_fastq_path                                                        
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        6                                            
12  mindepth_majrule            6    

# Demultiplex all four libraries

In [11]:
## run the first library through step 1 
data_lib1.run("1", ipyclient=ipyclient)


  Assembly: tsuga_lib1
  [####################] 100%  sorting reads         | 0:01:22 | s1 | 
  [####################] 100%  writing/compressing   | 0:17:12 | s1 | 


In [12]:
## use the '!' and cat command to view the stats file summarizing step 1
data_lib1.stats

raw_file                               total_reads    cut_found  bar_matched
tsuga_R1_lib1.fastq                       92383189     92377196     81437196
tsuga_R2_lib1.fastq                       92383189     92382358          415

sample_name                            total_reads
S006                                      10111530
S011                                       3281231
S016                                       4889141
S024                                       2030919
S069                                          8220
S081                                      11101092
S128                                        652186
S136                                      27629244
S142                                       7227747
S322                                       8845425
S324                                       2840110
S332                                       2820766

sample_name                               true_bar       obs_bar     N_records
S006  

In [8]:
## run the second library through step 1 
data_lib2.run("1", ipyclient=ipyclient)

Parallel connection | tesla.ibest.uidaho.edu: 30 cores
[####################] 100% 0:34:46 | sorting reads        | s1 |
[####################] 100% 0:01:08 | writing/compressing  | s1 |


In [9]:
## use the '!' and cat command to view the stats file summarizing step 1
data_lib2.run

raw_file                               total_reads    cut_found  bar_matched
tsuga_R1_lib2.fastq                      116773615    116765715    107153806
tsuga_R2_lib2.fastq                      116773615    116773139          372

sample_name                            total_reads
S014                                       5161631
S072                                          6183
S085                                      12231998
S096                                       7325899
S131                                       4107097
S133                                       1153533
S140                                      30741507
S176                                       9736582
S323                                      10526134
S330                                      11896726
S336                                       8068046
S492                                       6198842

sample_name                               true_bar       obs_bar     N_records
S014  

In [13]:
## run the third library through step 1 
data_lib3.run("1", ipyclient=ipyclient)

Parallel connection | tesla.ibest.uidaho.edu: 30 cores
[####################] 100% 0:23:42 | sorting reads        | s1 |
[####################] 100% 0:00:45 | writing/compressing  | s1 |


In [6]:
## use the '!' and cat command to view the stats file summarizing step 1
data_lib3.ststs

raw_file                               total_reads    cut_found  bar_matched
tsuga_R1_lib3.fastq                       78509847     78503546     71116206
tsuga_R2_lib3.fastq                       78509847     78509435          246

sample_name                            total_reads
S010                                       9296818
S021                                       2761225
S066                                         12758
S075                                       6084756
S090                                       5048505
S122                                        718713
S127                                       1092415
S141                                       3265448
S146                                      16443804
S325                                      10655442
S333                                       6900265
S335                                       8836303

sample_name                               true_bar       obs_bar     N_records
S010  

In [10]:
data_lib4.run("1", ipyclient=ipyclient)

Parallel connection | tesla.ibest.uidaho.edu: 30 cores
[####################] 100% 0:29:26 | sorting reads        | s1 |
[####################] 100% 0:00:55 | writing/compressing  | s1 |


In [12]:
## use the '!' and cat command to view the stats file summarizing step 1
data_lib4.stats

raw_file                               total_reads    cut_found  bar_matched
tsuga_R1_lib4.fastq                       97187548     97179230     88573777
tsuga_R2_lib4.fastq                       97187548     97187165          384

sample_name                            total_reads
S004                                      19611192
S022                                       2133177
S078                                         36812
S087                                      14818090
S093                                       2393554
S124                                       3297161
S137                                       1106822
S149                                       4010058
S326                                      11132884
S327                                      14927516
S328                                      11482519
S334                                       3624376

sample_name                               true_bar       obs_bar     N_records
S004  

# Merge all four libraries

In [20]:
## merge the demiltiplexed libraries of tsuga; there are 4
FullTsugaData =  ip.merge("AllTsugaLibs", [data_lib1, data_lib2, data_lib3, data_lib4])

In [22]:
## check the parameters of the combined libraries and adjust parameters for step 2
FullTsugaData.get_params()

  0   assembly_name               AllTsugaLibs                                 
  1   project_dir                 ./                                           
  2   raw_fastq_path              Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
  3   barcodes_path               Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
  4   sorted_fastq_path           Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
  5   assembly_method             denovo                                       
  6   reference_sequence                                                       
  7   datatype                    ddrad                                        
  8   restriction_overhang        ('TGCAG', 'CCGG')                            
  9   max_low_qual_bases          5                                            
  10  phred_Qscore_offset         33                                           
  11  mindepth_statistical        6                                            
  12  mindept

In [23]:
## changing filter adapters to 2 means reads are searched for illumina adaptors
FullTsugaData.set_params("filter_adapters", 2)

In [17]:
## changing filter adapters to 2 means reads are searched for illumina adaptors
FullTsugaData.set_params("filter_adapters", 2)
## for steps 3 and 6
FullTsugaData.set_params("clust_threshold", 0.8)
## for steps 4 and 5
FullTsugaData.set_params("mindepth_statistical", 10)
FullTsugaData.set_params("mindepth_majrule", 5)
FullTsugaData.set_params("maxdepth", 50000)
FullTsugaData.set_params("max_Hs_consens", 0.10)
FullTsugaData.set_params("max_Ns_consens", 0.10)

## for step 7
FullTsugaData.set_params("min_samples_locus", 10)

FullTsugaData.get_params()

0   assembly_name               FullTsugaData_AS2                            
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
3   barcodes_path               Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
4   sorted_fastq_path           Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        10                                           
12  mindep

In [24]:
## run step 2
FullTsugaData.run("2", ipyclient=ipyclient)


  Assembly: AllTsugaLibs
  [####################] 100%  processing reads      | 0:24:04 | s2 | 


In [15]:
## load the json file from an assembly 
FullTsugaData = ip.load_json("FullTsugaData_AS2.json")

loading Assembly: FullTsugaData_AS2
from saved path: /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly/FullTsugaData_AS2.json


In [18]:
## look at the number of reads after the filtering
FullTsugaData.get_params()

0   assembly_name               FullTsugaData_AS2                            
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
3   barcodes_path               Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
4   sorted_fastq_path           Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        10                                           
12  mindep

In [24]:
## for step 3, don't need to change anything because keeping the clust threshold at 0.85
## for step 4, leave max alleles at 2
## for step 5, leave max Ns at 5 bc the reads are pretty short (~50 bp)
## for step 6, nothing changes

## run step 3456
FullTsugaData.run("3",force=True, ipyclient=ipyclient)

Parallel connection | crick.ibest.uidaho.edu: 48 cores
[####################] 100% 0:01:26 | dereplicating        | s3 |
[####################] 100% 0:41:05 | clustering/mapping   | s3 |
[####################] 100% 0:00:13 | building clusters    | s3 |
[####################] 100% 0:00:02 | chunking clusters    | s3 |
[####################] 100% 1:01:09 | aligning clusters    | s3 |
[####################] 100% 0:00:09 | concat clusters      | s3 |
[####################] 100% 0:00:08 | calc cluster stats   | s3 |


In [25]:
FullTsugaData.branch("FullTsugaData_AS3")

<ipyrad.core.assembly.Assembly at 0x7fee4e0f4810>

In [26]:
FullTsugaData.stats

Unnamed: 0,state,reads_raw,reads_passed_filter,clusters_total,clusters_hidepth
S004,3,19611192,19531270,415086,198388
S006,3,10111530,10059201,375737,160219
S010,3,9296818,9287589,239983,120982
S011,3,3281231,3272284,199751,74638
S014,3,5161631,5142832,218847,90937
S016,3,4889141,4875493,263523,105710
S021,3,2761225,2759551,79674,28493
S022,3,2133177,2126321,90925,31671
S024,3,2030919,2023367,90956,26213
S066,3,12758,12728,2232,873


In [27]:
## for step 4, leave max alleles at 2
FullTsugaData.run("4",force=True, ipyclient=ipyclient)

Parallel connection | crick.ibest.uidaho.edu: 48 cores
[####################] 100% 0:11:51 | inferring [H, E]     | s4 |


In [28]:
FullTsugaData.branch("FullTsugaData_AS4")

<ipyrad.core.assembly.Assembly at 0x7fee4e947590>

In [31]:
FullTsugaData.run("5",force=True, ipyclient=ipyclient)

Parallel connection | crick.ibest.uidaho.edu: 48 cores
[####################] 100% 0:00:08 | calculating depths   | s5 |
[####################] 100% 0:00:14 | chunking clusters    | s5 |
[####################] 100% 0:27:00 | consens calling      | s5 |
[####################] 100% 0:00:31 | indexing alleles     | s5 |


In [32]:
FullTsugaData.branch("FullTsugaData_AS5")

<ipyrad.core.assembly.Assembly at 0x7fee4c06e750>

In [37]:
## load the json file from an assembly 
FullTsugaData = ip.load_json("FullTsugaData_AS5.json")

loading Assembly: FullTsugaData_AS5
from saved path: /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly/FullTsugaData_AS5.json


In [38]:
FullTsugaData.stats

Unnamed: 0,state,reads_raw,reads_passed_filter,clusters_total,clusters_hidepth,hetero_est,error_est,reads_consens
S004,5,19611192,19531270,415086,198388,0.023662,0.001977,188257
S006,5,10111530,10059201,375737,160219,0.027794,0.002627,150192
S010,5,9296818,9287589,239983,120982,0.02362,0.002155,113957
S011,5,3281231,3272284,199751,74638,0.0231,0.002356,71284
S014,5,5161631,5142832,218847,90937,0.021263,0.001963,87028
S016,5,4889141,4875493,263523,105710,0.025444,0.00261,100082
S021,5,2761225,2759551,79674,28493,0.023739,0.002473,27188
S022,5,2133177,2126321,90925,31671,0.025071,0.002574,30265
S024,5,2030919,2023367,90956,26213,0.023691,0.001663,25078
S066,5,12758,12728,2232,873,0.008002,0.001222,862


In [39]:
FullTsugaData.run("6",force=True, ipyclient=ipyclient)

Parallel connection | crick.ibest.uidaho.edu: 48 cores
[####################] 100% 0:00:39 | concatenating inputs | s6 |
[####################] 100% 0:55:18 | clustering tier 1    | s6 |
[####################] 100% 0:00:05 | concatenating inputs | s6 |
[####################] 100% 0:23:36 | clustering across    | s6 |
[####################] 100% 0:00:23 | building clusters    | s6 |
[####################] 100% 0:02:25 | aligning clusters    | s6 |


In [40]:
FullTsugaData.branch("FullTsugaData_AS6")

<ipyrad.core.assembly.Assembly at 0x7fee4e550790>

In [41]:
FullTsugaData.stats

Unnamed: 0,state,reads_raw,reads_passed_filter,clusters_total,clusters_hidepth,hetero_est,error_est,reads_consens
S004,6,19611192,19531270,415086,198388,0.023662,0.001977,188257
S006,6,10111530,10059201,375737,160219,0.027794,0.002627,150192
S010,6,9296818,9287589,239983,120982,0.02362,0.002155,113957
S011,6,3281231,3272284,199751,74638,0.0231,0.002356,71284
S014,6,5161631,5142832,218847,90937,0.021263,0.001963,87028
S016,6,4889141,4875493,263523,105710,0.025444,0.00261,100082
S021,6,2761225,2759551,79674,28493,0.023739,0.002473,27188
S022,6,2133177,2126321,90925,31671,0.025071,0.002574,30265
S024,6,2030919,2023367,90956,26213,0.023691,0.001663,25078
S066,6,12758,12728,2232,873,0.008002,0.001222,862


In [14]:
FullTsugaData.get_params()

0   assembly_name               AllTsugaLibs                                 
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
3   barcodes_path               Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
4   sorted_fastq_path           Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        6                                            
12  mindep

In [46]:
## look at the number of reads after the filtering
FullTsugaData.stats

Unnamed: 0,state,reads_raw,reads_passed_filter,clusters_total,clusters_hidepth,hetero_est,error_est,reads_consens
S004,6,19611192,19531270,415086,198388,0.023662,0.001977,188257
S006,6,10111530,10059201,375737,160219,0.027794,0.002627,150192
S010,6,9296818,9287589,239983,120982,0.02362,0.002155,113957
S011,6,3281231,3272284,199751,74638,0.0231,0.002356,71284
S014,6,5161631,5142832,218847,90937,0.021263,0.001963,87028
S016,6,4889141,4875493,263523,105710,0.025444,0.00261,100082
S021,6,2761225,2759551,79674,28493,0.023739,0.002473,27188
S022,6,2133177,2126321,90925,31671,0.025071,0.002574,30265
S024,6,2030919,2023367,90956,26213,0.023691,0.001663,25078
S066,6,12758,12728,2232,873,0.008002,0.001222,862


In [16]:
FullTsugaData.branch("FullTsugaData_AS6")

<ipyrad.core.assembly.Assembly at 0x7f4b7a233150>

In [43]:
FullTsugaData.set_params("min_samples_locus", 25)

In [45]:
FullTsugaData.run("7",force=True, ipyclient=ipyclient)

Parallel connection | crick.ibest.uidaho.edu: 48 cores
[####################] 100% 0:00:14 | applying filters     | s7 |
[####################] 100% 0:00:04 | building arrays      | s7 |
[####################] 100% 0:00:02 | writing conversions  | s7 |
[####################] 100% 0:00:04 | indexing vcf depths  | s7 |
[####################] 100% 0:00:09 | writing vcf output   | s7 |


In [49]:
LimitSamples = ["S004", "S006", "S010", "S011", "S014", "S016", "S021", "S022", "S024", "S075", "S081","S085", 
               "S087", "S090", "S093", "S096", "S137", "S142", "S146", "S149", "S176", "S323",
               "S324", "S325", "S326", "S327", "S328", "S330", "S332", "S333", "S334", "S335", "S336", "S492"]

TsugaSampleLimit = FullTsugaData.branch("TsugaSampleLimit", subsamples=LimitSamples)

In [53]:
TsugaSampleLimit.set_params("filter_min_trim_len", 125)

In [54]:
TsugaSampleLimit.get_params()

0   assembly_name               TsugaSampleLimit                             
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
3   barcodes_path               Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
4   sorted_fastq_path           Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        10                                           
12  mindep

In [55]:
TsugaSampleLimit.run("7", force=True, ipyclient=ipyclient)

Parallel connection | crick.ibest.uidaho.edu: 48 cores
[####################] 100% 0:00:07 | applying filters     | s7 |
[####################] 100% 0:00:02 | building arrays      | s7 |
[####################] 100% 0:00:02 | writing conversions  | s7 |
[####################] 100% 0:00:03 | indexing vcf depths  | s7 |
[####################] 100% 0:00:03 | writing vcf output   | s7 |


In [44]:
FullTsugaData.get_params()

0   assembly_name               FullTsugaData_AS5                            
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
3   barcodes_path               Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
4   sorted_fastq_path           Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        10                                           
12  mindep

In [11]:
## change max Ns to like 20? and max SNPs for step 5
FullTsugaData.set_params("max_Ns_consens", (20, 20))
FullTsugaData.set_params("max_SNPs_locus", (50, 50))                         

In [18]:
FullTsugaData.stats

Unnamed: 0,state,reads_raw,reads_passed_filter,clusters_total,clusters_hidepth,hetero_est,error_est,reads_consens
S004,6,19611192,19531270,435474,187146,0.019144,0.001489,165790
S006,6,10111530,10059201,395842,149909,0.019692,0.002072,130429
S010,6,9296818,9287589,253944,115637,0.016037,0.001764,103182
S011,6,3281231,3272284,207268,66995,0.015029,0.001896,60703
S014,6,5161631,5142832,226798,83296,0.015165,0.001607,75642
S016,6,4889141,4875493,274663,96804,0.016811,0.002175,85857
S021,6,2761225,2759551,82559,25694,0.016692,0.002304,22748
S022,6,2133177,2126321,94376,28409,0.017512,0.002277,25002
S024,6,2030919,2023367,94266,23452,0.01738,0.00144,20813
S066,6,12758,12728,2248,740,0.004126,0.001158,714


In [15]:
## run step 6, nothing changes here
FullTsugaData.run("6",force=True, ipyclient=ipyclient)


  Assembly: AllTsugaLibs
  [####################] 100%  concat/shuffle input  | 0:00:48 | s6 | 
  [####################] 100%  clustering across     | 0:39:52 | s6 | 
  [####################] 100%  building clusters     | 0:00:34 | s6 | 
  [####################] 100%  aligning clusters     | 0:03:25 | s6 | 
  [####################] 100%  database indels       | 0:02:14 | s6 | 
  [####################] 100%  indexing clusters     | 0:02:13 | s6 | 
  [####################] 100%  building database     | 0:38:16 | s6 | 


In [10]:
## make branch for new output files with min sample locus at 20
MinSamp20 = FullTsugaData.branch("TsugaMinSamp20")


In [11]:
MinSamp20.get_params()

0   assembly_name               TsugaMinSamp20                               
1   project_dir                 /mnt/lfs2/ruff6699/Tsugaheterophylla_AllDataAnalysis/TsugaIpyradAssembly
2   raw_fastq_path              Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
3   barcodes_path               Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
4   sorted_fastq_path           Merged: tsuga_lib1, tsuga_lib2, tsuga_lib3, tsuga_lib4
5   assembly_method             denovo                                       
6   reference_sequence                                                       
7   datatype                    ddrad                                        
8   restriction_overhang        ('TGCAG', 'CCGG')                            
9   max_low_qual_bases          5                                            
10  phred_Qscore_offset         33                                           
11  mindepth_statistical        6                                            
12  mindep

In [8]:
## run step 7, nothing changes here
FullTsugaData.run("7", force=True, ipyclient=ipyclient)

Parallel connection | tesla.ibest.uidaho.edu: 48 cores

Encountered an Error.
Message: 'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte
Traceback (most recent call last):
  File "/mnt/ceph/ruff6699/miniconda3/envs/ipyrad/lib/python3.7/site-packages/ipyrad/core/Parallel.py", line 313, in wrap_run
    self.tool._run(ipyclient=self.ipyclient, **self.rkwargs)
  File "/mnt/ceph/ruff6699/miniconda3/envs/ipyrad/lib/python3.7/site-packages/ipyrad/core/assembly.py", line 686, in _run
    stepdict[step](self, force, ipyclient).run()
  File "/mnt/ceph/ruff6699/miniconda3/envs/ipyrad/lib/python3.7/site-packages/ipyrad/assemble/write_outputs.py", line 47, in __init__
    self.samples = self.get_subsamples()
  File "/mnt/ceph/ruff6699/miniconda3/envs/ipyrad/lib/python3.7/site-packages/ipyrad/assemble/write_outputs.py", line 108, in get_subsamples
    dbsamples = inloci.readline()[1:].strip().split(",@")
  File "/mnt/ceph/ruff6699/miniconda3/envs/ipyrad/lib/python3.7/codecs.py",