# --- Improved Luis's Script ---

## Get ID's

In [None]:
# Imports
from Bio import Entrez
import csv
import time
import pandas as pd
from urllib.error import HTTPError  # for Python 2 use: from urllib2 import HTTPError  # for Python 2

# Define max_samples
max_samples = 10

# Define email
Entrez.email = "vela.vela.luis@gmail.com" #"karsten.leonhardt@posteo.de"

# Perform search - get handle
handle = Entrez.esearch(db="gds", term="GSE[ETYP] AND Homo[Organism]", usehistory="y", retmax = max_samples)

# Read results
record = Entrez.read(handle)

# Get idlist
idlist = record['IdList']

# Count entries
found_count = int(record['Count'])
read_count = len(idlist)

# Echo results
print('Total number of FOUND entries: ' + str(found_count))
print('Total number of READ  entries: ' + str(read_count))

# Close handle
handle.close()

## Get Summaries

In [None]:
filename = 'test_'+str(max_samples)+'.csv'

# Open csv-target file
with open(filename, 'w') as opened_file:

    # write with writer
    csvwriter = csv.writer(opened_file)

    # Set fieldnames
#     fieldnames = ['Item', 'Id', 'Accession', 'GDS', 'title', 
#           'summary', 'GPL', 'GSE', 'taxon', 'entryType', 'gdsType', 
#           'ptechType', 'valType', 'SSInfo', 'subsetInfo', 'PDAT', 
#           'suppFile', 'Samples', 'Relations', 'ExtRelations', 
#           'n_samples', 'SeriesTitle', 'PlatformTitle', 'PlatformTaxa', 
#           'SamplesTaxa', 'PubMedIds', 'Projects', 'FTPLink', 'GEO2R']
    fieldnames = ['Id', 'Accession', 'title', 'summary', 'taxon']
        
    # Print fildnames
    csvwriter.writerow(fieldnames)

    # Begin retrieval
    for i, e_id in enumerate(idlist):
    
        # Echo info
        print("Going to download record: {:10.0f} ({:5.1f}%)".format(int(e_id), (i+1)/read_count*100))
    
        # Get Summary
        handle = Entrez.esummary(db="gds", id=e_id)
    
        # Read handle
        data = Entrez.read(handle)

        # Define list for things to print
        list_to_print = list()

        # Iterate over fieldnames
        for name in fieldnames:
            list_to_print.append(data[0][name])

        # Print the line
        csvwriter.writerow(list_to_print)
        
        # Close handle
        handle.close()    

## Check IF DataFrame

In [None]:
# Read to DataFrame
pd.read_csv(filename, sep=',', header=[0], error_bad_lines=False)