In [1]:
import numpy as np
import pandas as pd
import os
from pathlib import Path  
import GEOparse

In [2]:
# GSE13159

In [3]:
'''gse = GEOparse.get_GEO(geo="GSE58212", destdir="gse_arquivos")
print("GSM example:")
for gsm_name, gsm in gse.gsms.items():
    gsm.table = gsm.table.set_index('ID_REF')
    print("Name: ", gsm_name)
    print("Metadata:",)
    for key, value in gsm.metadata.items():
        print(" - %s : %s" % (key, ", ".join(value)))
    print ("Table data:",)
    print (gsm.table.head())
    break'''

'gse = GEOparse.get_GEO(geo="GSE58212", destdir="gse_arquivos")\nprint("GSM example:")\nfor gsm_name, gsm in gse.gsms.items():\n    gsm.table = gsm.table.set_index(\'ID_REF\')\n    print("Name: ", gsm_name)\n    print("Metadata:",)\n    for key, value in gsm.metadata.items():\n        print(" - %s : %s" % (key, ", ".join(value)))\n    print ("Table data:",)\n    print (gsm.table.head())\n    break'

In [4]:
def GEO_to_CSV(geo_code, csv):
    path = 'gse_arquivos'
    df = pd.DataFrame()
    try:
        os.makedirs(path)
        print(f"Diretório '{path}' criado com sucesso.")
    except FileExistsError:
        print(f"Diretório '{path}' já existe.")
    try:
        gse = GEOparse.get_GEO(geo=geo_code, destdir=path)
        print(f"[{geo_code}] Repositório baixado para pasta {path}")
    except Exception as e:
        print(f"[{geo_code}] Problema ao baixar repositório: {e}")
    
    print(f"[{geo_code}] Processando importação de amostras...")
    try:
        for gsm_name, gsm in gse.gsms.items():

            #Dropando tabelas não usáveis
            gsm.table = gsm.table.drop('ABS_CALL', axis=1)
            gsm.table = gsm.table.drop('DETECTION P-VALUE', axis=1)
            gsm.table = gsm.table.drop('VALUE', axis=1)
            
            # Definindo ID_REF como linha
            gsm.table = gsm.table.set_index('ID_REF')
            
            # Rearranjo dataframe
            gsm.table = gsm.table.T
            
            # Inserindo nome dos samples
            gsm.table.insert(0,"sample",[gsm_name],True)

            '''
            O laço for abaixo deve ser customizado para cada repositório, pois cada um tem um modelo de features
            e captura de dados diferente, depende da sua necessidade.
            '''
            # Inserindo o tipo do sample: normal ou breast_adenocarcinoma
            for item in gsm.metadata.get('characteristics_ch1'):
                if "leukemia class" in item:
                    tissue_type = item.split(":")[1].strip()
                    gsm.table.insert(1,"type",[tissue_type],True)
        
            df = pd.concat([df, gsm.table], axis=0)
    except Exception as e:
        print(f"[{geo_code}] Problema ao importar amostra '{gsm_name}': {e}")
        
    # Resetando os index de linha do dataframe
    df = df.reset_index(drop=True)
    df = df.rename_axis(None, axis=1)
    
    if(csv):
        try:
            df.to_csv(geo_code+'.csv', index=False)
            print(f"[{geo_code}] Repositório convertido para csv: '{geo_code}.csv'")
        except Exception as e:
            print(f"[{geo_code}] Problema ao converter para csv: {e}")
    print(f"[{geo_code}] Dataset importa com sucesso!")
    return df

In [5]:
GSE13159 = GEO_to_CSV('GSE13159', False)

02-Sep-2023 17:35:29 DEBUG utils - Directory gse_arquivos already exists. Skipping.
02-Sep-2023 17:35:29 INFO GEOparse - File already exist: using local version.
02-Sep-2023 17:35:29 INFO GEOparse - Parsing gse_arquivos\GSE13159_family.soft.gz: 
02-Sep-2023 17:35:29 DEBUG GEOparse - DATABASE: GeoMiame
02-Sep-2023 17:35:29 DEBUG GEOparse - SERIES: GSE13159
02-Sep-2023 17:35:29 DEBUG GEOparse - PLATFORM: GPL570


Diretório 'gse_arquivos' já existe.


  return read_csv(StringIO(data), index_col=None, sep="\t")
02-Sep-2023 17:35:30 DEBUG GEOparse - SAMPLE: GSM329407
02-Sep-2023 17:35:30 DEBUG GEOparse - SAMPLE: GSM329408
02-Sep-2023 17:35:30 DEBUG GEOparse - SAMPLE: GSM329409
02-Sep-2023 17:35:30 DEBUG GEOparse - SAMPLE: GSM329410
02-Sep-2023 17:35:30 DEBUG GEOparse - SAMPLE: GSM329411
02-Sep-2023 17:35:31 DEBUG GEOparse - SAMPLE: GSM329412
02-Sep-2023 17:35:31 DEBUG GEOparse - SAMPLE: GSM329413
02-Sep-2023 17:35:31 DEBUG GEOparse - SAMPLE: GSM329414
02-Sep-2023 17:35:31 DEBUG GEOparse - SAMPLE: GSM329415
02-Sep-2023 17:35:31 DEBUG GEOparse - SAMPLE: GSM329416
02-Sep-2023 17:35:31 DEBUG GEOparse - SAMPLE: GSM329417
02-Sep-2023 17:35:31 DEBUG GEOparse - SAMPLE: GSM329418
02-Sep-2023 17:35:31 DEBUG GEOparse - SAMPLE: GSM329419
02-Sep-2023 17:35:32 DEBUG GEOparse - SAMPLE: GSM329420
02-Sep-2023 17:35:32 DEBUG GEOparse - SAMPLE: GSM329421
02-Sep-2023 17:35:32 DEBUG GEOparse - SAMPLE: GSM329422
02-Sep-2023 17:35:32 DEBUG GEOparse - SAMPLE

02-Sep-2023 17:35:46 DEBUG GEOparse - SAMPLE: GSM329774
02-Sep-2023 17:35:46 DEBUG GEOparse - SAMPLE: GSM329775
02-Sep-2023 17:35:46 DEBUG GEOparse - SAMPLE: GSM329776
02-Sep-2023 17:35:46 DEBUG GEOparse - SAMPLE: GSM329777
02-Sep-2023 17:35:46 DEBUG GEOparse - SAMPLE: GSM329778
02-Sep-2023 17:35:46 DEBUG GEOparse - SAMPLE: GSM329779
02-Sep-2023 17:35:47 DEBUG GEOparse - SAMPLE: GSM329780
02-Sep-2023 17:35:47 DEBUG GEOparse - SAMPLE: GSM329781
02-Sep-2023 17:35:47 DEBUG GEOparse - SAMPLE: GSM329782
02-Sep-2023 17:35:47 DEBUG GEOparse - SAMPLE: GSM329783
02-Sep-2023 17:35:47 DEBUG GEOparse - SAMPLE: GSM329784
02-Sep-2023 17:35:47 DEBUG GEOparse - SAMPLE: GSM329785
02-Sep-2023 17:35:47 DEBUG GEOparse - SAMPLE: GSM329786
02-Sep-2023 17:35:47 DEBUG GEOparse - SAMPLE: GSM329787
02-Sep-2023 17:35:47 DEBUG GEOparse - SAMPLE: GSM329788
02-Sep-2023 17:35:48 DEBUG GEOparse - SAMPLE: GSM329789
02-Sep-2023 17:35:48 DEBUG GEOparse - SAMPLE: GSM329790
02-Sep-2023 17:35:48 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:36:02 DEBUG GEOparse - SAMPLE: GSM329921
02-Sep-2023 17:36:02 DEBUG GEOparse - SAMPLE: GSM329922
02-Sep-2023 17:36:03 DEBUG GEOparse - SAMPLE: GSM329923
02-Sep-2023 17:36:03 DEBUG GEOparse - SAMPLE: GSM329924
02-Sep-2023 17:36:03 DEBUG GEOparse - SAMPLE: GSM329925
02-Sep-2023 17:36:03 DEBUG GEOparse - SAMPLE: GSM329926
02-Sep-2023 17:36:03 DEBUG GEOparse - SAMPLE: GSM329927
02-Sep-2023 17:36:03 DEBUG GEOparse - SAMPLE: GSM329928
02-Sep-2023 17:36:03 DEBUG GEOparse - SAMPLE: GSM329929
02-Sep-2023 17:36:03 DEBUG GEOparse - SAMPLE: GSM329930
02-Sep-2023 17:36:04 DEBUG GEOparse - SAMPLE: GSM329931
02-Sep-2023 17:36:04 DEBUG GEOparse - SAMPLE: GSM329932
02-Sep-2023 17:36:04 DEBUG GEOparse - SAMPLE: GSM329933
02-Sep-2023 17:36:04 DEBUG GEOparse - SAMPLE: GSM329934
02-Sep-2023 17:36:04 DEBUG GEOparse - SAMPLE: GSM329935
02-Sep-2023 17:36:04 DEBUG GEOparse - SAMPLE: GSM329936
02-Sep-2023 17:36:04 DEBUG GEOparse - SAMPLE: GSM329937
02-Sep-2023 17:36:04 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:36:19 DEBUG GEOparse - SAMPLE: GSM330068
02-Sep-2023 17:36:19 DEBUG GEOparse - SAMPLE: GSM330069
02-Sep-2023 17:36:19 DEBUG GEOparse - SAMPLE: GSM330070
02-Sep-2023 17:36:19 DEBUG GEOparse - SAMPLE: GSM330071
02-Sep-2023 17:36:19 DEBUG GEOparse - SAMPLE: GSM330072
02-Sep-2023 17:36:19 DEBUG GEOparse - SAMPLE: GSM330073
02-Sep-2023 17:36:20 DEBUG GEOparse - SAMPLE: GSM330074
02-Sep-2023 17:36:20 DEBUG GEOparse - SAMPLE: GSM330075
02-Sep-2023 17:36:20 DEBUG GEOparse - SAMPLE: GSM330076
02-Sep-2023 17:36:20 DEBUG GEOparse - SAMPLE: GSM330077
02-Sep-2023 17:36:20 DEBUG GEOparse - SAMPLE: GSM330078
02-Sep-2023 17:36:20 DEBUG GEOparse - SAMPLE: GSM330079
02-Sep-2023 17:36:20 DEBUG GEOparse - SAMPLE: GSM330080
02-Sep-2023 17:36:20 DEBUG GEOparse - SAMPLE: GSM330081
02-Sep-2023 17:36:20 DEBUG GEOparse - SAMPLE: GSM330082
02-Sep-2023 17:36:21 DEBUG GEOparse - SAMPLE: GSM330083
02-Sep-2023 17:36:21 DEBUG GEOparse - SAMPLE: GSM330084
02-Sep-2023 17:36:21 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:36:36 DEBUG GEOparse - SAMPLE: GSM330215
02-Sep-2023 17:36:36 DEBUG GEOparse - SAMPLE: GSM330216
02-Sep-2023 17:36:36 DEBUG GEOparse - SAMPLE: GSM330217
02-Sep-2023 17:36:36 DEBUG GEOparse - SAMPLE: GSM330218
02-Sep-2023 17:36:36 DEBUG GEOparse - SAMPLE: GSM330219
02-Sep-2023 17:36:36 DEBUG GEOparse - SAMPLE: GSM330220
02-Sep-2023 17:36:36 DEBUG GEOparse - SAMPLE: GSM330221
02-Sep-2023 17:36:37 DEBUG GEOparse - SAMPLE: GSM330222
02-Sep-2023 17:36:37 DEBUG GEOparse - SAMPLE: GSM330223
02-Sep-2023 17:36:37 DEBUG GEOparse - SAMPLE: GSM330224
02-Sep-2023 17:36:37 DEBUG GEOparse - SAMPLE: GSM330225
02-Sep-2023 17:36:37 DEBUG GEOparse - SAMPLE: GSM330226
02-Sep-2023 17:36:37 DEBUG GEOparse - SAMPLE: GSM330227
02-Sep-2023 17:36:37 DEBUG GEOparse - SAMPLE: GSM330228
02-Sep-2023 17:36:37 DEBUG GEOparse - SAMPLE: GSM330229
02-Sep-2023 17:36:38 DEBUG GEOparse - SAMPLE: GSM330230
02-Sep-2023 17:36:38 DEBUG GEOparse - SAMPLE: GSM330231
02-Sep-2023 17:36:38 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:36:53 DEBUG GEOparse - SAMPLE: GSM330368
02-Sep-2023 17:36:53 DEBUG GEOparse - SAMPLE: GSM330369
02-Sep-2023 17:36:53 DEBUG GEOparse - SAMPLE: GSM330370
02-Sep-2023 17:36:54 DEBUG GEOparse - SAMPLE: GSM330371
02-Sep-2023 17:36:54 DEBUG GEOparse - SAMPLE: GSM330372
02-Sep-2023 17:36:54 DEBUG GEOparse - SAMPLE: GSM330373
02-Sep-2023 17:36:54 DEBUG GEOparse - SAMPLE: GSM330374
02-Sep-2023 17:36:54 DEBUG GEOparse - SAMPLE: GSM330375
02-Sep-2023 17:36:54 DEBUG GEOparse - SAMPLE: GSM330376
02-Sep-2023 17:36:54 DEBUG GEOparse - SAMPLE: GSM330377
02-Sep-2023 17:36:54 DEBUG GEOparse - SAMPLE: GSM330378
02-Sep-2023 17:36:54 DEBUG GEOparse - SAMPLE: GSM330379
02-Sep-2023 17:36:55 DEBUG GEOparse - SAMPLE: GSM330380
02-Sep-2023 17:36:55 DEBUG GEOparse - SAMPLE: GSM330381
02-Sep-2023 17:36:55 DEBUG GEOparse - SAMPLE: GSM330382
02-Sep-2023 17:36:55 DEBUG GEOparse - SAMPLE: GSM330383
02-Sep-2023 17:36:55 DEBUG GEOparse - SAMPLE: GSM330384
02-Sep-2023 17:36:55 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:37:10 DEBUG GEOparse - SAMPLE: GSM330515
02-Sep-2023 17:37:11 DEBUG GEOparse - SAMPLE: GSM330516
02-Sep-2023 17:37:11 DEBUG GEOparse - SAMPLE: GSM330517
02-Sep-2023 17:37:11 DEBUG GEOparse - SAMPLE: GSM330518
02-Sep-2023 17:37:11 DEBUG GEOparse - SAMPLE: GSM330519
02-Sep-2023 17:37:11 DEBUG GEOparse - SAMPLE: GSM330520
02-Sep-2023 17:37:11 DEBUG GEOparse - SAMPLE: GSM330521
02-Sep-2023 17:37:11 DEBUG GEOparse - SAMPLE: GSM330522
02-Sep-2023 17:37:11 DEBUG GEOparse - SAMPLE: GSM330523
02-Sep-2023 17:37:11 DEBUG GEOparse - SAMPLE: GSM330524
02-Sep-2023 17:37:12 DEBUG GEOparse - SAMPLE: GSM330525
02-Sep-2023 17:37:12 DEBUG GEOparse - SAMPLE: GSM330526
02-Sep-2023 17:37:12 DEBUG GEOparse - SAMPLE: GSM330527
02-Sep-2023 17:37:12 DEBUG GEOparse - SAMPLE: GSM330528
02-Sep-2023 17:37:12 DEBUG GEOparse - SAMPLE: GSM330529
02-Sep-2023 17:37:12 DEBUG GEOparse - SAMPLE: GSM330530
02-Sep-2023 17:37:12 DEBUG GEOparse - SAMPLE: GSM330531
02-Sep-2023 17:37:12 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:37:27 DEBUG GEOparse - SAMPLE: GSM330662
02-Sep-2023 17:37:27 DEBUG GEOparse - SAMPLE: GSM330663
02-Sep-2023 17:37:27 DEBUG GEOparse - SAMPLE: GSM330664
02-Sep-2023 17:37:27 DEBUG GEOparse - SAMPLE: GSM330665
02-Sep-2023 17:37:28 DEBUG GEOparse - SAMPLE: GSM330666
02-Sep-2023 17:37:28 DEBUG GEOparse - SAMPLE: GSM330667
02-Sep-2023 17:37:28 DEBUG GEOparse - SAMPLE: GSM330668
02-Sep-2023 17:37:28 DEBUG GEOparse - SAMPLE: GSM330669
02-Sep-2023 17:37:28 DEBUG GEOparse - SAMPLE: GSM330670
02-Sep-2023 17:37:28 DEBUG GEOparse - SAMPLE: GSM330671
02-Sep-2023 17:37:28 DEBUG GEOparse - SAMPLE: GSM330672
02-Sep-2023 17:37:28 DEBUG GEOparse - SAMPLE: GSM330673
02-Sep-2023 17:37:28 DEBUG GEOparse - SAMPLE: GSM330674
02-Sep-2023 17:37:29 DEBUG GEOparse - SAMPLE: GSM330675
02-Sep-2023 17:37:29 DEBUG GEOparse - SAMPLE: GSM330676
02-Sep-2023 17:37:29 DEBUG GEOparse - SAMPLE: GSM330677
02-Sep-2023 17:37:29 DEBUG GEOparse - SAMPLE: GSM330678
02-Sep-2023 17:37:29 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:37:44 DEBUG GEOparse - SAMPLE: GSM330809
02-Sep-2023 17:37:44 DEBUG GEOparse - SAMPLE: GSM330810
02-Sep-2023 17:37:44 DEBUG GEOparse - SAMPLE: GSM330811
02-Sep-2023 17:37:45 DEBUG GEOparse - SAMPLE: GSM330812
02-Sep-2023 17:37:45 DEBUG GEOparse - SAMPLE: GSM330813
02-Sep-2023 17:37:45 DEBUG GEOparse - SAMPLE: GSM330814
02-Sep-2023 17:37:45 DEBUG GEOparse - SAMPLE: GSM330815
02-Sep-2023 17:37:45 DEBUG GEOparse - SAMPLE: GSM330816
02-Sep-2023 17:37:45 DEBUG GEOparse - SAMPLE: GSM330817
02-Sep-2023 17:37:45 DEBUG GEOparse - SAMPLE: GSM330818
02-Sep-2023 17:37:45 DEBUG GEOparse - SAMPLE: GSM330819
02-Sep-2023 17:37:45 DEBUG GEOparse - SAMPLE: GSM330820
02-Sep-2023 17:37:46 DEBUG GEOparse - SAMPLE: GSM330821
02-Sep-2023 17:37:46 DEBUG GEOparse - SAMPLE: GSM330822
02-Sep-2023 17:37:46 DEBUG GEOparse - SAMPLE: GSM330823
02-Sep-2023 17:37:46 DEBUG GEOparse - SAMPLE: GSM330824
02-Sep-2023 17:37:46 DEBUG GEOparse - SAMPLE: GSM330825
02-Sep-2023 17:37:46 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:38:01 DEBUG GEOparse - SAMPLE: GSM330956
02-Sep-2023 17:38:01 DEBUG GEOparse - SAMPLE: GSM330957
02-Sep-2023 17:38:01 DEBUG GEOparse - SAMPLE: GSM330958
02-Sep-2023 17:38:02 DEBUG GEOparse - SAMPLE: GSM330959
02-Sep-2023 17:38:02 DEBUG GEOparse - SAMPLE: GSM330960
02-Sep-2023 17:38:02 DEBUG GEOparse - SAMPLE: GSM330961
02-Sep-2023 17:38:02 DEBUG GEOparse - SAMPLE: GSM330962
02-Sep-2023 17:38:02 DEBUG GEOparse - SAMPLE: GSM330963
02-Sep-2023 17:38:02 DEBUG GEOparse - SAMPLE: GSM330964
02-Sep-2023 17:38:02 DEBUG GEOparse - SAMPLE: GSM330965
02-Sep-2023 17:38:02 DEBUG GEOparse - SAMPLE: GSM330966
02-Sep-2023 17:38:03 DEBUG GEOparse - SAMPLE: GSM330967
02-Sep-2023 17:38:03 DEBUG GEOparse - SAMPLE: GSM330968
02-Sep-2023 17:38:03 DEBUG GEOparse - SAMPLE: GSM330969
02-Sep-2023 17:38:03 DEBUG GEOparse - SAMPLE: GSM330970
02-Sep-2023 17:38:03 DEBUG GEOparse - SAMPLE: GSM330971
02-Sep-2023 17:38:03 DEBUG GEOparse - SAMPLE: GSM330972
02-Sep-2023 17:38:03 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:38:19 DEBUG GEOparse - SAMPLE: GSM331103
02-Sep-2023 17:38:19 DEBUG GEOparse - SAMPLE: GSM331104
02-Sep-2023 17:38:19 DEBUG GEOparse - SAMPLE: GSM331105
02-Sep-2023 17:38:19 DEBUG GEOparse - SAMPLE: GSM331106
02-Sep-2023 17:38:19 DEBUG GEOparse - SAMPLE: GSM331107
02-Sep-2023 17:38:19 DEBUG GEOparse - SAMPLE: GSM331108
02-Sep-2023 17:38:20 DEBUG GEOparse - SAMPLE: GSM331109
02-Sep-2023 17:38:20 DEBUG GEOparse - SAMPLE: GSM331110
02-Sep-2023 17:38:20 DEBUG GEOparse - SAMPLE: GSM331111
02-Sep-2023 17:38:20 DEBUG GEOparse - SAMPLE: GSM331112
02-Sep-2023 17:38:20 DEBUG GEOparse - SAMPLE: GSM331113
02-Sep-2023 17:38:20 DEBUG GEOparse - SAMPLE: GSM331114
02-Sep-2023 17:38:20 DEBUG GEOparse - SAMPLE: GSM331115
02-Sep-2023 17:38:20 DEBUG GEOparse - SAMPLE: GSM331116
02-Sep-2023 17:38:21 DEBUG GEOparse - SAMPLE: GSM331117
02-Sep-2023 17:38:21 DEBUG GEOparse - SAMPLE: GSM331118
02-Sep-2023 17:38:21 DEBUG GEOparse - SAMPLE: GSM331119
02-Sep-2023 17:38:21 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:38:36 DEBUG GEOparse - SAMPLE: GSM331250
02-Sep-2023 17:38:37 DEBUG GEOparse - SAMPLE: GSM331251
02-Sep-2023 17:38:37 DEBUG GEOparse - SAMPLE: GSM331252
02-Sep-2023 17:38:37 DEBUG GEOparse - SAMPLE: GSM331253
02-Sep-2023 17:38:37 DEBUG GEOparse - SAMPLE: GSM331254
02-Sep-2023 17:38:37 DEBUG GEOparse - SAMPLE: GSM331255
02-Sep-2023 17:38:37 DEBUG GEOparse - SAMPLE: GSM331256
02-Sep-2023 17:38:37 DEBUG GEOparse - SAMPLE: GSM331257
02-Sep-2023 17:38:37 DEBUG GEOparse - SAMPLE: GSM331258
02-Sep-2023 17:38:38 DEBUG GEOparse - SAMPLE: GSM331259
02-Sep-2023 17:38:38 DEBUG GEOparse - SAMPLE: GSM331260
02-Sep-2023 17:38:38 DEBUG GEOparse - SAMPLE: GSM331261
02-Sep-2023 17:38:38 DEBUG GEOparse - SAMPLE: GSM331262
02-Sep-2023 17:38:38 DEBUG GEOparse - SAMPLE: GSM331263
02-Sep-2023 17:38:38 DEBUG GEOparse - SAMPLE: GSM331264
02-Sep-2023 17:38:38 DEBUG GEOparse - SAMPLE: GSM331265
02-Sep-2023 17:38:38 DEBUG GEOparse - SAMPLE: GSM331266
02-Sep-2023 17:38:39 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:38:54 DEBUG GEOparse - SAMPLE: GSM331397
02-Sep-2023 17:38:54 DEBUG GEOparse - SAMPLE: GSM331398
02-Sep-2023 17:38:54 DEBUG GEOparse - SAMPLE: GSM331399
02-Sep-2023 17:38:54 DEBUG GEOparse - SAMPLE: GSM331400
02-Sep-2023 17:38:54 DEBUG GEOparse - SAMPLE: GSM331401
02-Sep-2023 17:38:54 DEBUG GEOparse - SAMPLE: GSM331402
02-Sep-2023 17:38:55 DEBUG GEOparse - SAMPLE: GSM331403
02-Sep-2023 17:38:55 DEBUG GEOparse - SAMPLE: GSM331404
02-Sep-2023 17:38:55 DEBUG GEOparse - SAMPLE: GSM331405
02-Sep-2023 17:38:55 DEBUG GEOparse - SAMPLE: GSM331406
02-Sep-2023 17:38:55 DEBUG GEOparse - SAMPLE: GSM331407
02-Sep-2023 17:38:55 DEBUG GEOparse - SAMPLE: GSM331408
02-Sep-2023 17:38:55 DEBUG GEOparse - SAMPLE: GSM331409
02-Sep-2023 17:38:55 DEBUG GEOparse - SAMPLE: GSM331410
02-Sep-2023 17:38:56 DEBUG GEOparse - SAMPLE: GSM331411
02-Sep-2023 17:38:56 DEBUG GEOparse - SAMPLE: GSM331412
02-Sep-2023 17:38:56 DEBUG GEOparse - SAMPLE: GSM331413
02-Sep-2023 17:38:56 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:39:11 DEBUG GEOparse - SAMPLE: GSM331544
02-Sep-2023 17:39:11 DEBUG GEOparse - SAMPLE: GSM331545
02-Sep-2023 17:39:11 DEBUG GEOparse - SAMPLE: GSM331546
02-Sep-2023 17:39:12 DEBUG GEOparse - SAMPLE: GSM331547
02-Sep-2023 17:39:12 DEBUG GEOparse - SAMPLE: GSM331548
02-Sep-2023 17:39:12 DEBUG GEOparse - SAMPLE: GSM331549
02-Sep-2023 17:39:12 DEBUG GEOparse - SAMPLE: GSM331550
02-Sep-2023 17:39:12 DEBUG GEOparse - SAMPLE: GSM331551
02-Sep-2023 17:39:12 DEBUG GEOparse - SAMPLE: GSM331552
02-Sep-2023 17:39:12 DEBUG GEOparse - SAMPLE: GSM331553
02-Sep-2023 17:39:12 DEBUG GEOparse - SAMPLE: GSM331554
02-Sep-2023 17:39:12 DEBUG GEOparse - SAMPLE: GSM331555
02-Sep-2023 17:39:13 DEBUG GEOparse - SAMPLE: GSM331556
02-Sep-2023 17:39:13 DEBUG GEOparse - SAMPLE: GSM331557
02-Sep-2023 17:39:13 DEBUG GEOparse - SAMPLE: GSM331558
02-Sep-2023 17:39:13 DEBUG GEOparse - SAMPLE: GSM331559
02-Sep-2023 17:39:13 DEBUG GEOparse - SAMPLE: GSM331560
02-Sep-2023 17:39:13 DEBUG GEOparse - SAMPLE: GS

02-Sep-2023 17:39:29 DEBUG GEOparse - SAMPLE: GSM331691
02-Sep-2023 17:39:29 DEBUG GEOparse - SAMPLE: GSM331692
02-Sep-2023 17:39:29 DEBUG GEOparse - SAMPLE: GSM331693
02-Sep-2023 17:39:29 DEBUG GEOparse - SAMPLE: GSM331694
02-Sep-2023 17:39:30 DEBUG GEOparse - SAMPLE: GSM331695
02-Sep-2023 17:39:30 DEBUG GEOparse - SAMPLE: GSM331696
02-Sep-2023 17:39:30 DEBUG GEOparse - SAMPLE: GSM331697
02-Sep-2023 17:39:30 DEBUG GEOparse - SAMPLE: GSM331698
02-Sep-2023 17:39:30 DEBUG GEOparse - SAMPLE: GSM331699
02-Sep-2023 17:39:30 DEBUG GEOparse - SAMPLE: GSM331700
02-Sep-2023 17:39:30 DEBUG GEOparse - SAMPLE: GSM331701
02-Sep-2023 17:39:30 DEBUG GEOparse - SAMPLE: GSM331702
02-Sep-2023 17:39:30 DEBUG GEOparse - SAMPLE: GSM331703
02-Sep-2023 17:39:31 DEBUG GEOparse - SAMPLE: GSM331704
02-Sep-2023 17:39:31 DEBUG GEOparse - SAMPLE: GSM331705
02-Sep-2023 17:39:31 DEBUG GEOparse - SAMPLE: GSM331706
02-Sep-2023 17:39:31 DEBUG GEOparse - SAMPLE: GSM331707
02-Sep-2023 17:39:31 DEBUG GEOparse - SAMPLE: GS

[GSE13159] Repositório baixado para pasta gse_arquivos
[GSE13159] Processando importação de amostras...
[GSE13159] Dataset importa com sucesso!


In [6]:
GSE13159.head()

Unnamed: 0,sample,type,AFFX-BioB-5_at,AFFX-BioB-M_at,AFFX-BioB-3_at,AFFX-BioC-5_at,AFFX-BioC-3_at,AFFX-BioDn-5_at,AFFX-BioDn-3_at,AFFX-CreX-5_at,...,1570631_at,1570632_at,1570633_at,1570635_at,1570639_at,1570644_at,1570645_at,1570650_at,1570651_at,1570653_at
0,GSM329407,mature B-ALL with t(8;14),1019.38,1099.16,688.488,2576.98,3363.56,6420.71,12750.3,26337.3,...,89.8769,16.5212,48.501,18.0256,25.8039,31.1645,48.5148,20.3446,72.4218,23.0226
1,GSM329408,mature B-ALL with t(8;14),1126.74,1167.66,840.345,2572.97,3291.99,6171.86,11723.4,27573.8,...,37.0536,62.7578,28.3635,45.439,59.3555,51.2732,85.3964,36.3192,37.2117,29.1351
2,GSM329409,mature B-ALL with t(8;14),1545.94,2062.58,1240.19,3278.49,4032.6,8713.1,18047.1,44608.9,...,74.3732,26.2296,42.944,22.7571,109.297,51.7468,45.0193,43.8192,58.559,13.2296
3,GSM329410,mature B-ALL with t(8;14),2747.02,2703.53,1646.69,6280.02,7565.71,13037.1,29576.3,61453.7,...,117.708,25.2851,30.6478,89.1606,78.0304,238.059,93.7531,32.9817,196.12,28.1665
4,GSM329411,mature B-ALL with t(8;14),5614.93,9707.9,5331.42,17928.6,18722.0,28874.1,70896.2,128980.0,...,309.345,52.8402,104.07,136.921,125.509,57.2892,127.528,214.824,488.823,122.293


In [7]:
valor = GSE13159.at[6, 'AFFX-CreX-3_at']
print(valor)

40253.4


In [8]:
GSE13159.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2096 entries, 0 to 2095
Columns: 54677 entries, sample to 1570653_at
dtypes: float64(54675), object(2)
memory usage: 874.4+ MB


In [9]:
GSE13159['type'].unique()

array(['mature B-ALL with t(8;14)', 'Pro-B-ALL with t(11q23)/MLL',
       'c-ALL/Pre-B-ALL with t(9;22)', 'T-ALL', 'ALL with t(12;21)',
       'ALL with t(1;19)', 'ALL with hyperdiploid karyotype',
       'c-ALL/Pre-B-ALL without t(9;22)', 'AML with t(8;21)',
       'AML with t(15;17)', 'AML with inv(16)/t(16;16)',
       'AML with t(11q23)/MLL',
       'AML with normal karyotype + other abnormalities',
       'AML complex aberrant karyotype', 'CLL', 'CML', 'MDS',
       'Non-leukemia and healthy bone marrow'], dtype=object)

In [11]:
GSE13159.to_csv('GSE13159.csv', index=False)