<a href="https://colab.research.google.com/github/ravichas/bioinformatics/blob/main/NCBI_EDIRECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## NCBI Edirect tutorial using COLAB 
S. Ravichandran (saka.ravi@gmail.com)




## Install EDIRECT

https://www.ncbi.nlm.nih.gov/books/NBK179288/

In [None]:
!date

In [None]:
%%bash
MINICONDA_INSTALLER_SCRIPT=Miniconda3-4.5.4-Linux-x86_64.sh
MINICONDA_PREFIX=/usr/local
wget https://repo.continuum.io/miniconda/$MINICONDA_INSTALLER_SCRIPT >& /dev/null
chmod +x $MINICONDA_INSTALLER_SCRIPT
./$MINICONDA_INSTALLER_SCRIPT -b -f -p $MINICONDA_PREFIX >& /dev/null 

In [None]:
!which conda # should return /usr/local/bin/conda

In [None]:
!conda --version # should return 4.5.4

In [None]:
!which python # still returns /usr/local/bin/python

In [None]:
%%bash
conda install --channel defaults conda python=3.7 --yes
conda update --channel defaults --all --yes

In [None]:
!conda --version # now returns 4.8.3

In [None]:
import sys
sys.path 

In [None]:
!ls /usr/local/lib/python3.6/site-packages/

In [None]:
_ = (sys.path.append("/usr/local/lib/python3.7/site-packages"))

In [None]:
!conda install -c bioconda -y entrez-direct

In [None]:
!ls /usr/local/lib/python3.7/site-packages/

In [None]:
!ls  /usr/local/bin/efetch

In [None]:
! esearch -db

## Just making sure

In [None]:
!esearch

In [None]:
%%bash
echo "***********************" > installconfirm
echo "esearch version:" >> installconfirm
esearch -version >> installconfirm
echo "xtract version:" >> installconfirm
xtract -version >> installconfirm
echo "EDirect install status:" >> installconfirm
esearch -db pubmed -query "Babalobi OO[au] AND 2008[pdat]" | \
efetch -format xml | \
xtract -pattern Author -if Affiliation -contains Medicine \
-element Initials >> installconfirm
echo "***********************" >> installconfirm
cat installconfirm

In [None]:
!cat installconfirm

## Run some edirect examples

In [None]:
!esearch -db pubmed -query "breast cancer" 

In [None]:
!esearch -db pubmed -query "selective serotonin reuptake inhibitor"

In [None]:
!efetch -db protein -id 3OQZ_a -format fasta

In [None]:
!efetch -db protein -id 7LYZ -format fasta

In [None]:
%%bash
esearch -db pubmed -query "opsin gene conversion" | elink -related | efilter -query "tetrachromacy"

**Genetic and physical maps of Saccharomyces cerevisiae, **

Edition R K Mortimer, C R Contopoulou, J S King

In [None]:
! efetch -db pubmed -id 1413997 -format xml | \
  xtract -pattern PubmedArticle -block Author -element Initials LastName

In [None]:
%%bash
esearch -db pubmed -query "Havran W [AUTH]" |
  efetch -format xml |
  xtract -pattern PubmedArticle -if "#Author" -lt 13 \
    -block Author -if LastName -is-not Havran \
      -sep ", " -tab "\n" -element LastName,Initials |
  sort-uniq-count-rank

In [None]:
!esearch -db pubmed -query "Havran W [AUTH]"

In [None]:
%%bash
esearch -db protein -query "human[ORGN] AND 799000:900000[molwt]" \
  | efetch -format gb


## Linux 

In [None]:
!uname -a

In [None]:
!cat /etc/issue

In [None]:
!pwd


In [None]:
%%bash
var1=98
echo " var1 is set to:  " $var1

if [ "${var1}" -gt  2 ]
then
   echo "Var1 is > 3"
else
   echo "Var1 is <= 3"
fi



In [None]:
%%bash
cd /content
pwd
mkdir -p test
cd test
touch Ravi.txt
pwd
cd ..

pwd

In [None]:
%%bash 

cd test/test1
#mkdir test1
pwd

In [None]:
%%bash
cd

In [None]:
!pwd

In [None]:
%%bash 

ls 

ls -l -f


## Help

In [None]:
man ls

In [None]:
%%bash
wget http://www.rcsb.org/pdb/files/7lyz.pdb

In [None]:
!tail 7lyz.pdb

In [None]:
%%bash
cat 7lyz.pdb | grep "ATOM"  > 7lyzAllATOM.pdb
cat 7lyz.pdb | grep "^ATOM" > 7lyzATOM.pdb

In [None]:
!more 7lyzATOM.pdb

In [None]:
more 7lyz.pdb

In [None]:
%%bash
wget http://www.rcsb.org/pdb/files/2src.pdb

In [None]:
!mv 2src.pdb 2src.PDB

In [None]:
!ls -l


In [None]:
!rm -i 2src.PDB

In [None]:
!ls -l 2src.PDB

In [None]:
!touch new_file.txt


In [None]:
!ls -l new_file.txt

In [None]:
!find /content -name edirect

In [None]:
!cat 7lyz.pdb | grep CA


In [None]:
!cat 7lyz.pdb | grep CA | wc -l

In [None]:
!df

In [None]:
!head 7lyz.pdb

In [None]:
!tail 7lyz.pdb

In [None]:
%%bash
cat << EOF > config.txt
first line
second lie
third line 
EOF


In [None]:
!cat config.txt

In [None]:
!tar -czvf test.tar.gz edirect


In [None]:
!ls -l *tar*

In [None]:
!top


In [None]:
%history

In [None]:
!which zip
!which unzip

## Other important commands

* chmod
* chown
* kill 
* Ctrl-c 
* Ctrl-z  # will pause the command


In [None]:
%%bash
esearch -db pubmed -query "lycopene cyclase" |efetch -format abstract > HitsFile.txt

In [None]:
!wc -l HitsFile.txt