# 1. Download processed WGBS-seq data

## 1.1 Lister et al. (2013) Bisulfite sequencing of frontal cortex
> [GSE Accession: GSE47966](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE47966)

In [1]:
import GEOparse
from ftplib import FTP
import gzip
import pandas as pd
from tqdm.auto import tqdm

GEOparse.logger.set_verbosity('ERROR')

In [2]:
def get_ftp_file_links(GSE, sample):
    """Create a list of supplementary FTP file links associated
    with a GSE sample
    
    Args:
      GSE (str): GSE accession number
      sample (str): Sample title
      
    Returns:
      ftp_file_links (list): list containing the file names
      cwd (str): directory path where the files are present
    """
    
    ftp_file_names = []
    cwd = ""
    gse = GEOparse.get_GEO(GSE)
    
    gse_pheno = gse.phenotype_data
    for i, j in gse_pheno[gse_pheno["title"] == sample].items():
        if i.startswith("supplementary"):
            if isinstance(j.values[0], str):
                cwd = j.values[0].split(".gov/")[-1].rpartition("/")[0]
                ftp_file_names.append(j.values[0].split("/")[-1])
    
    return ftp_file_names, cwd  

### 1.1.1 Frontal Cortex whole

In [3]:
ftp_file_names, cwd = get_ftp_file_links("GSE47966", "MethylC-Seq_mm_fc_6wk")

In [4]:
ftp = FTP('ftp.ncbi.nlm.nih.gov')
ftp.login()
ftp.cwd(cwd)
with open(f'data/GSE47966_MethylC-Seq_mm_fc_6wk.txt.gz', 'wb') as fp:
    for file_name in tqdm(ftp_file_names, leave=False):
        if not 'chrL' in file_name:
            ftp.retrbinary(f'RETR {file_name}', fp.write)
ftp.quit()

  0%|          | 0/22 [00:00<?, ?it/s]

'221 Goodbye.'

### 1.1.2 Frontal Cortex (sortex NeuN+ve nuclei)

In [None]:
ftp_file_names, cwd = get_ftp_file_links("GSE47966", "MethylC-Seq_mm_fc_male_7wk_neun_pos")

In [None]:
ftp = FTP('ftp.ncbi.nlm.nih.gov')
ftp.login()
ftp.cwd(cwd)
with open(f'data/GSE47966_MethylC-Seq_mm_fc_male_7wk_neun_pos.txt.gz', 'wb') as fp:
    for file_name in tqdm(ftp_file_names, leave=False):
        if not 'chrL' in file_name:
            ftp.retrbinary(f'RETR {file_name}', fp.write)
ftp.quit()

### 1.1.3 Frontal Cortex (sortex NeuN-ve nuclei)

In [None]:
ftp_file_names, cwd = get_ftp_file_links("GSE47966", "MethylC-Seq_mm_fc_male_7wk_neun_neg")

In [None]:
ftp = FTP('ftp.ncbi.nlm.nih.gov')
ftp.login()
ftp.cwd(cwd)
with open(f'data/GSE47966_MethylC-Seq_mm_fc_male_7wk_neun_neg.txt.gz', 'wb') as fp:
    for file_name in tqdm(ftp_file_names, leave=False):
        if not 'chrL' in file_name:
            ftp.retrbinary(f'RETR {file_name}', fp.write)
ftp.quit()

## 1.2 Lagger et al. (2017) Bisulfite sequencing of hypothalamus
> [GSE Accession: GSE84533](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE84533)

In [None]:
gse = GEOparse.get_GEO("GSE84533")
ftp = FTP('ftp.ncbi.nlm.nih.gov')
ftp.login()
for n, (accession, _file_name) in tqdm(enumerate(gse.phenotype_data[["geo_accession", "supplementary_file_1"]].values, start=1), leave=False):
    file_name = _file_name.split("ftp.ncbi.nlm.nih.gov/")[-1]
    with open(f"data/GSE84533_{accession}_hypothalamus_{n}.txt.gz", "wb") as fp:
        ftp.retrbinary(f'RETR {file_name}', fp.write)
ftp.quit()

## 1.3 Boxer et al. (2020) Oxidative Bisulfite sequencing of forebrain

> [GSE Accession: GSE128172](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE128172)

In [None]:
with open(f'data/GSE128172_MeCP2_WT_OX_rep123.txt.gz', 'wb') as fp:
    
    ftp = FTP('ftp.ncbi.nlm.nih.gov')
    ftp.login()
    rep1_ftp_file_names, cwd = get_ftp_file_links("GSE128172", "MeCP2_WT_OX_rep1")
    ftp.cwd(cwd)
    for file_name in tqdm(rep1_ftp_file_names, leave=False):
        ftp.retrbinary(f'RETR {file_name}', fp.write)
    ftp.quit()
    
    ftp = FTP('ftp.ncbi.nlm.nih.gov')
    ftp.login()
    rep2_ftp_file_names, cwd = get_ftp_file_links("GSE128172", "MeCP2_WT_OX_rep2")
    ftp.cwd(cwd)
    for file_name in tqdm(rep2_ftp_file_names, leave=False):
        ftp.retrbinary(f'RETR {file_name}', fp.write)
    ftp.quit()
    
    ftp = FTP('ftp.ncbi.nlm.nih.gov')
    ftp.login()
    rep3_ftp_file_names, cwd = get_ftp_file_links("GSE128172", "MeCP2_WT_OX_rep3")
    ftp.cwd(cwd)
    for file_name in tqdm(rep3_ftp_file_names, leave=False):
        ftp.retrbinary(f'RETR {file_name}', fp.write)
    ftp.quit()