# Notebook

This my first experimtn

## Reference

 

## DataSets

 * Mouse Genome: mm10 ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Mus_musculus/UCSC/mm10/Mus_musculus_UCSC_mm10.tar.gz
 


In [1]:
# Set the environment variables to be used during the project 

import os
import xmltodict
from Bio import Entrez
Entrez.email = "r78v10a07@gmail.com"
os.environ['WORKDIR'] = os.path.abspath('../')
os.environ['CONFIG'] = os.environ['WORKDIR'] + '/config'
os.environ['DATA'] = os.environ['WORKDIR'] + '/data'
os.environ['BIN'] = os.environ['WORKDIR'] + '/bin'
os.environ['RESULTS'] = os.environ['WORKDIR'] + '/results'
os.environ['DOC'] = os.environ['WORKDIR'] + '/docs'
os.environ['SRC'] = os.environ['WORKDIR'] + '/src'

os.environ['MOUSE_BIOPROJECT'] = 'PRJNA79859'
os.environ['MOUSE_SRA'] = 'SRA026846'

In [2]:
%%bash

# Downloading the mm10 genome to the folder data/mm10
cd $DATA
if [ ! -e "Mus_musculus" ]
then    
    wget -b ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Mus_musculus/UCSC/mm10/Mus_musculus_UCSC_mm10.tar.gz
    tar xzf Mus_musculus_UCSC_mm10.tar.gz
fi

In [3]:
# Get the SRA IDs related to the BioProject from NCBI

handle = Entrez.esearch(db="sra", term=os.environ['MOUSE_BIOPROJECT'], retmax=50)
mouse_sra_id = Entrez.read(handle)
handle.close()

In [4]:
# Retrieving the bioproject from NCBI

handle = Entrez.esearch(db="bioproject", term=os.environ['MOUSE_BIOPROJECT'])
mouse_bioproject_id = Entrez.read(handle)
handle.close()
mouse_bioproject_id = mouse_bioproject_id['IdList']
handle = Entrez.efetch(db="bioproject", id=mouse_bioproject_id)
mouse_bioproject = xmltodict.parse(handle.read())
handle.close()

In [5]:
# Retirve each experiment related to the SRA IDs from NCBI

mouse_sra = {}
for id in mouse_sra_id['IdList']:
    handle = Entrez.efetch(db="sra", id=id, rettype="full", retmode="full")
    mouse_sra[id] = xmltodict.parse(handle.read())
    handle.close()

In [6]:
# Let's print the runs retrieved

for rec in mouse_sra:
    print('ID:\t' + rec 
          + '\tTITLE:\t' + mouse_sra[rec]['EXPERIMENT_PACKAGE_SET']['EXPERIMENT_PACKAGE']['SAMPLE']['TITLE']
          + '\tALIAS:\t' + mouse_sra[rec]['EXPERIMENT_PACKAGE_SET']['EXPERIMENT_PACKAGE']['SAMPLE']['@alias']
          + '\tRUN:\t' + mouse_sra[rec]['EXPERIMENT_PACKAGE_SET']['EXPERIMENT_PACKAGE']['RUN_SET']['RUN']['@accession'])

ID:	40432	TITLE:	B6 lane 1 experiment 7	ALIAS:	B6_1_7	RUN:	SRR099237
ID:	40420	TITLE:	B6 lane 6 experiment 4	ALIAS:	B6_6_4	RUN:	SRR099227
ID:	40429	TITLE:	D2 lane 7 experiment 6	ALIAS:	D2_7_6	RUN:	SRR099235
ID:	40423	TITLE:	B6 lane 8 experiment 4	ALIAS:	B6_8_4	RUN:	SRR099229
ID:	40428	TITLE:	D2 lane 6 experiment 6	ALIAS:	D2_6_6	RUN:	SRR099234
ID:	40435	TITLE:	D2 lane 5 experiment 7	ALIAS:	D2_5_7	RUN:	SRR099240
ID:	40424	TITLE:	B6 lane 1 experiment 6	ALIAS:	B6_1_6	RUN:	SRR099230
ID:	40419	TITLE:	D2 lane 5 experiment 4	ALIAS:	D2_5_4	RUN:	SRR099226
ID:	40427	TITLE:	B6 lane 5 experiment 6	ALIAS:	B6_5_6	RUN:	SRR099233
ID:	40430	TITLE:	D2 lane 8 experiment 6	ALIAS:	D2_8_6	RUN:	SRR099236
ID:	40426	TITLE:	B6 lane 3 experiment 6	ALIAS:	B6_3_6	RUN:	SRR099232
ID:	40437	TITLE:	D2 lane 7 experiment 7	ALIAS:	D2_7_7	RUN:	SRR099242
ID:	40417	TITLE:	D2 lane 2 experiment 4	ALIAS:	D2_2_4	RUN:	SRR099224
ID:	40416	TITLE:	D2 lane 1 experiment 4	ALIAS:	D2_1_4	RUN:	SRR099223
ID:	40438	TITLE:	D2 lane 8 experim

In [10]:
%%bash
# Downloading the SRR data using fastq-dump

if [ ! -e "$DATA/${MOUSE_SRA}" ]
then
    mkdir $DATA/${MOUSE_SRA}
    sh $BIN/download_SRA.sh ${MOUSE_SRA} $DATA/${MOUSE_SRA}
else
    echo "The data is ready"
fi

The data is ready


In [23]:
# Let's run Bowtie2 for each SRR

os.chdir(os.environ['RESULTS'])
for rec in mouse_sra:
    os.environ['SAMPLE'] = mouse_sra[rec]['EXPERIMENT_PACKAGE_SET']['EXPERIMENT_PACKAGE']['RUN_SET']['RUN']['@accession']
    print("Running bowtie2 for " + os.environ['SAMPLE'])
    !bowtie2 -x ../data/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/genome --very-sensitive --score-min "L,-0.4,-0.1" --gbar 8 -p 4 $DATA/${MOUSE_SRA}/$SAMPLE.fastq.gz | samtools view -Sb - > $SAMPLE.bam


Running bowtie2 for SRR099228
Running bowtie2 for SRR099243
Running bowtie2 for SRR099236
Running bowtie2 for SRR099237
Running bowtie2 for SRR099233
Running bowtie2 for SRR099226
Running bowtie2 for SRR099235
Running bowtie2 for SRR099240
Running bowtie2 for SRR099242
Running bowtie2 for SRR099231
Running bowtie2 for SRR099238
Running bowtie2 for SRR099225
Running bowtie2 for SRR099241
Running bowtie2 for SRR099227
Running bowtie2 for SRR099239
Running bowtie2 for SRR099224
Running bowtie2 for SRR099229
Running bowtie2 for SRR099234
Running bowtie2 for SRR099232
Running bowtie2 for SRR099230
Running bowtie2 for SRR099223
