# Analyzing 3RAD data from Asian kudzu 

In [10]:
import ipyrad as ip
import ipyrad.analysis as ipa
import pandas as pd
import ipyparallel as ipp

In [11]:
# connect to parallel client
ipyclient = ipp.Client()
ip.cluster_info(ipyclient)

host compute node: [24 cores] on t067


### 1. Read in the data files

The internal barcodes are dual indexed, meaning the combination of barcodes attached to R1 and R2 identify samples, 
so the barcodes file lists two barcodes for each sample.  
Plates a identified by i7 and i5, so were demultiplexed in bcl2fastq2 v.2.20.0.422.
They do not have PCR duplicate identifiers.

In [4]:
BARCODES_US1 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/US1_Innerbarcodes.txt"
BARCODES_US2 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/US2_Innerbarcodes.txt"
BARCODES_Native1 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/Native1_Innerbarcodes.txt"
BARCODES_Native2 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/Native2_Innerbarcodes.txt"
BARCODES_Native3 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/Native3_Innerbarcodes.txt"
BARCODES_Native4 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/Native4_Innerbarcodes.txt"
BARCODES_Native5 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/Native5_Innerbarcodes.txt"
BARCODES_Native6 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/Native6_Innerbarcodes.txt"
READS_US1 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/NS_Kerin_US1_S6_R*.gz"
READS_US2 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/NS_Kerin_US2_S7_R*.gz"
READS_Native1 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/NS_Kerin_Native1_S8_R*.gz"
READS_Native2 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/NS_Kerin_Native2_S9_R*.gz"
READS_Native3 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/NS_Kerin_Native3_S1_R*.gz"
READS_Native4 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/NS_Kerin_Native4_S2_R*.gz"
READS_Native5 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/NS_Kerin_Native5_S3_R*.gz"
READS_Native6 = "/moto/eaton/users/slh2181/Pmontana/illumina_runs/NS_Kerin_Native6_S4_R*.gz"

In [6]:
! head -n 10 $BARCODES_US1

AL12-10KA	CCGAAT   	CTAACG	
AL12-12KA	TTAGGCA	    CTAACG	
AL12-14KA	AACTCGTC	CTAACG	
AL12-16KA	GGTCTACGT	CTAACG	
AL12-18KA	GATACC		CTAACG
AL12-19KA	AGCGTTG	    CTAACG	
AL12-1KA	CTGCAACT	CTAACG	
AL12-22KA	TCATGGTCA	CTAACG	
AL12-5KA	CCGAAT  	TCGGTAC	
AL12-6KA	TTAGGCA	    TCGGTAC	


### 2. Demultiplex Sequenced Run 2 on internal indices
This run was sequenced at UGA, tracking ID: 126984 and 126985 . We now pass all 8 plates and barcode files separately to step 1 of ipyrad to demultiplex to samples. 


Plate US1

In [19]:
# create an Assembly object with a barcodes file
US1 = ip.Assembly("demux_US1")
US1.params.project_dir = "/moto/eaton/users/slh2181/Pmontana/ipyrad/"
US1.params.barcodes_path = BARCODES_US1
US1.params.raw_fastq_path = READS_US1
US1.params.datatype = "pair3rad"
US1.params.restriction_overhang = ("GCTAGA", "TAATTC")

New Assembly: demux_US1


In [20]:
# run demultiplexing of plate US1
US1.run('1', auto=True, force=True)

Parallelization: t041: 24 cores
[                    ]   0% 0:34:15 | sorting reads        | s1 |

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[######              ]  32% 0:16:32 | writing/compressing  | s1 |

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[####################] 100% 0:40:15 | writing/compressing  | s1 |


In [21]:
# show stats for a few samples in plate US1
US1.stats.head()

Unnamed: 0,state,reads_raw
AL12-10KA,1,560055
AL12-12KA,1,74057
AL12-14KA,1,33215
AL12-16KA,1,68335
AL12-18KA,1,961431


Plate US2

In [7]:
# create an Assembly object with a barcodes file
US2 = ip.Assembly("demux_US2")
US2.params.project_dir = "/moto/eaton/users/slh2181/Pmontana/ipyrad/"
US2.params.barcodes_path = BARCODES_US2
US2.params.raw_fastq_path = READS_US2
US2.params.datatype = "pair3rad"
US2.params.restriction_overhang = ("GCTAGA", "TAATTC")

New Assembly: demux_US2


In [27]:
# run demultiplexing of plate US2
US2.run('1', auto=True, force=True)

Parallelization: t041: 24 cores
[####################] 100% 0:31:13 | sorting reads        | s1 |
[######              ]  34% 0:14:35 | writing/compressing  | s1 |

Exception in callback BaseAsyncIOLoop._handle_events(50, 1)
handle: <Handle BaseAsyncIOLoop._handle_events(50, 1)>
Traceback (most recent call last):
  File "/moto/home/slh2181/miniconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/moto/home/slh2181/miniconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "/moto/home/slh2181/miniconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/moto/home/slh2181/miniconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 459, in _handle_events
    self._rebuild_io_state()
  File "/moto/home/slh2181/miniconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 516, in _rebuild_io_state
    self._update_handler(state)
  File "/moto/home/slh2181/miniconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 533, in _u

[####################] 100% 0:46:50 | writing/compressing  | s1 |


In [8]:
# show stats for a few samples in plate US2
US2.stats.head()

Plate Native1

In [9]:
# create an Assembly object with a barcodes file
Native1 = ip.Assembly("demux_Native1")
Native1.params.project_dir = "/moto/eaton/users/slh2181/Pmontana/ipyrad/"
Native1.params.barcodes_path = BARCODES_Native1
Native1.params.raw_fastq_path = READS_Native1
Native1.params.datatype = "pair3rad"
Native1.params.restriction_overhang = ("GCTAGA", "TAATTC")

New Assembly: demux_Native1


In [10]:
# run demultiplexing of plate Native1
Native1.run('1', auto=True, force=True)

Parallelization: t041: 23 cores
[####################] 100% 0:33:17 | sorting reads        | s1 |
[####################] 100% 0:54:03 | writing/compressing  | s1 |


In [None]:
# show stats for a few samples in plate Native1
Native1.stats.head()

Plate Native2

In [5]:
# create an Assembly object with a barcodes file
Native2 = ip.Assembly("demux_Native2")
Native2.params.project_dir = "/moto/eaton/users/slh2181/Pmontana/ipyrad/"
Native2.params.barcodes_path = BARCODES_Native2
Native2.params.raw_fastq_path = READS_Native2
Native2.params.datatype = "pair3rad"
Native2.params.restriction_overhang = ("GCTAGA", "TAATTC")

New Assembly: demux_Native2


In [6]:
# run demultiplexing of plate Native2
Native2.run('1', auto=True, force=True)

Parallelization: t002: 22 cores
[####################] 100% 0:29:52 | sorting reads        | s1 |
[###                 ]  16% 0:06:59 | writing/compressing  | s1 |

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[###########         ]  58% 0:29:49 | writing/compressing  | s1 |

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [7]:
# show stats for a few samples in plate Native2
Native2.stats.head()

Unnamed: 0,state,reads_raw
KJP23-3A,1,696767
KJP23-5A,1,749738
KJP23-7A,1,429081
KJP25-10A,1,543394
KJP25-12A,1,243842


Plate Native3

In [8]:
# create an Assembly object with a barcodes file
Native3 = ip.Assembly("demux_Native3")
Native3.params.project_dir = "/moto/eaton/users/slh2181/Pmontana/ipyrad/"
Native3.params.barcodes_path = BARCODES_Native3
Native3.params.raw_fastq_path = READS_Native3
Native3.params.datatype = "pair3rad"
Native3.params.restriction_overhang = ("GCTAGA", "TAATTC")

New Assembly: demux_Native3


In [None]:
# run demultiplexing of plate Native3
Native3.run('1', auto=True, force=True)

Parallelization: t002: 24 cores
[####################] 100% 0:26:11 | sorting reads        | s1 |
[#################   ]  89% 0:47:22 | writing/compressing  | s1 |

In [None]:
# show stats for a few samples in plate Native3
Native3.stats.head()

Plate Native4

In [7]:
# create an Assembly object with a barcodes file
Native4 = ip.Assembly("demux_Native4")
Native4.params.project_dir = "/moto/eaton/users/slh2181/Pmontana/ipyrad/"
Native4.params.barcodes_path = BARCODES_Native4
Native4.params.raw_fastq_path = READS_Native4
Native4.params.datatype = "pair3rad"
Native4.params.restriction_overhang = ("GCTAGA", "TAATTC")

New Assembly: demux_US1


In [None]:
# run demultiplexing of plate Native4
Native4.run('1', auto=True, force=True)

Parallelization: t041: 24 cores
[                    ]   0% 0:00:39 | sorting reads        | s1 |

In [None]:
# show stats for a few samples in plate Native4
Native4.stats.head()

Plate Native5

In [7]:
# create an Assembly object with a barcodes file
Native5 = ip.Assembly("demux_Native5")
Native5.params.project_dir = "/moto/eaton/users/slh2181/Pmontana/ipyrad/"
Native5.params.barcodes_path = BARCODES_Native5
Native5.params.raw_fastq_path = READS_Native5
Native5.params.datatype = "pair3rad"
Native5.params.restriction_overhang = ("GCTAGA", "TAATTC")

New Assembly: demux_US1


In [None]:
# run demultiplexing of plate Native5
Native5.run('1', auto=True, force=True)

Parallelization: t041: 24 cores
[                    ]   0% 0:00:39 | sorting reads        | s1 |

In [None]:
# show stats for a few samples in plate Native5
Native5.stats.head()

Plate Native6

In [9]:
# create an Assembly object with a barcodes file
Native6 = ip.Assembly("demux_Native6")
Native6.params.project_dir = "/moto/eaton/users/slh2181/Pmontana/ipyrad/"
Native6.params.barcodes_path = BARCODES_Native6
Native6.params.raw_fastq_path = READS_Native6
Native6.params.datatype = "pair3rad"
Native6.params.restriction_overhang = ("GCTAGA", "TAATTC")

New Assembly: demux_Native6


In [12]:
# run demultiplexing of plate Native6
Native6.run('1', auto=True, force=True)

Parallelization: t067: 24 cores
[                    ]   0% 0:18:52 | sorting reads        | s1 |

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[####################] 100% 0:35:07 | sorting reads        | s1 |
[######              ]  30% 0:18:58 | writing/compressing  | s1 |

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[#############       ]  69% 0:40:53 | writing/compressing  | s1 |

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[####################] 100% 1:00:45 | writing/compressing  | s1 |


In [13]:
# show stats for a few samples in plate Native6
Native6.stats.head()

Unnamed: 0,state,reads_raw
AL12-10KA,1,636060
AL5-2KA,1,1134445
AR2-2KA,1,166164
GA36-6KA,1,803955
GA96-10KA,1,519621
