In [1]:
import os
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
# import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA
from sklearn.decomposition import NMF
from sklearn.model_selection import train_test_split
from get_mapping import get_mapping

In [2]:
file_names = ["GDS4987", "GDS4399", "GDS4133", "GDS4132", "GDS3841", "GDS3104", "GDS2084", "GDS1051", "GDS1050"]
file_names.sort()
print("Current Location:", os.getcwd())
os.chdir("../datasets/")
print("Changed Directory To:", os.getcwd())

Current Location: /home/sowmya/Desktop/cs6024/project/codes
Changed Directory To: /home/sowmya/Desktop/cs6024/project/datasets


In [3]:
# Assign mapping between samples and sample type
PCOS_mapping = {'GDS1050': ['GSM27536', 'GSM27537', 'GSM27538', 'GSM27540', 'GSM27541'], 'GDS1051': ['GSM29645', 'GSM29646', 'GSM29647', 'GSM29648', 'GSM29649'], 'GDS2084': ['GSM114834', 'GSM114842', 'GSM114843', 'GSM114847', 'GSM114848', 'GSM114850', 'GSM114852', 'GSM114853'], 'GDS3104': ['GSM156186', 'GSM156187', 'GSM156510', 'GSM156511', 'GSM156512', 'GSM156749', 'GSM156750', 'GSM156751', 'GSM156752', 'GSM156753', 'GSM156763', 'GSM156946', 'GSM156948', 'GSM156949', 'GSM156950', 'GSM156951'], 'GDS3841': ['GSM277460', 'GSM277459', 'GSM277458', 'GSM277457', 'GSM277456', 'GSM277455', 'GSM277454', 'GSM277453', 'GSM277452', 'GSM277451', 'GSM277450', 'GSM277449'], 'GDS4132': ['GSM201542', 'GSM201543', 'GSM201544', 'GSM201545', 'GSM201829', 'GSM201830', 'GSM201831', 'GSM201832', 'GSM201833', 'GSM201834'], 'GDS4133': ['GSM201863', 'GSM201864', 'GSM201865', 'GSM201866', 'GSM201867', 'GSM201868', 'GSM201869', 'GSM201870', 'GSM201871', 'GSM201872'], 'GDS4399': ['GSM850530', 'GSM850531', 'GSM850532', 'GSM850533', 'GSM850534', 'GSM850535', 'GSM850536'], 'GDS4987': ['GSM1174423', 'GSM1174424', 'GSM1174425', 'GSM1174426', 'GSM1174427', 'GSM1174428', 'GSM1174429', 'GSM1174430', 'GSM1174431', 'GSM1174432', 'GSM1174433', 'GSM1174434', 'GSM1174435', 'GSM1174436']}
control_mapping = {'GDS1050': ['GSM27531', 'GSM27532', 'GSM27533', 'GSM27534', 'GSM27543', 'GSM27546', 'GSM27548', 'GSM27549'], 'GDS1051': ['GSM29537', 'GSM29638', 'GSM29643', 'GSM29644', 'GSM29650', 'GSM29651', 'GSM29652', 'GSM29653'], 'GDS2084': ['GSM114841', 'GSM114844', 'GSM114845', 'GSM114849', 'GSM114851', 'GSM114854', 'GSM114855'], 'GDS3104': ['GSM155631', 'GSM155643', 'GSM155644', 'GSM155729', 'GSM156170', 'GSM156171', 'GSM156176', 'GSM156177', 'GSM156178', 'GSM156179', 'GSM156180', 'GSM156181', 'GSM156184'], 'GDS3841': ['GSM277448', 'GSM277447', 'GSM277446', 'GSM277445', 'GSM277444', 'GSM277443', 'GSM277442', 'GSM277441', 'GSM277440', 'GSM277439', 'GSM277438'], 'GDS4132': [''], 'GDS4133': ['GSM201849', 'GSM201850', 'GSM201851', 'GSM201852', 'GSM201853', 'GSM201854', 'GSM201855', 'GSM201856', 'GSM201857', 'GSM201858', 'GSM201859', 'GSM201861', 'GSM201862'], 'GDS4399': ['GSM850527', 'GSM850528', 'GSM850529'], 'GDS4987': ['GSM1174423', 'GSM1174424', 'GSM1174425', 'GSM1174426', 'GSM1174427', 'GSM1174428', 'GSM1174429', 'GSM1174430', 'GSM1174431', 'GSM1174432', 'GSM1174433', 'GSM1174434', 'GSM1174435', 'GSM1174436']}

# Get mapping between the IDs provided and the Entrez Gene IDs
mapping, exclude_mapping = get_mapping(flag=True)
unique_geneids = list(set(mapping.values()))

def get_float_or_str(j):
    if type(j) == float:
        return int(j)
    elif type(j) == str and len(j.split("_")) == 1:
        return (int(j))
    else:
        return j
unique_geneids = [get_float_or_str(i) for i in unique_geneids]
print(len(unique_geneids))

Number of IDs excluded: 20665
Number of genes mapped: 67313
Number of unique gene IDs: 25180
25180


In [4]:
df_list = []
for i in file_names:
    print(i)
    fin = open(i + "_full.txt")
    complete_data = fin.read().splitlines()
    fin.close()
    
    # Find position from which the dataset tabular form begins
    pos = complete_data.index("!dataset_table_begin")
    data = [i.split("\t") for i in complete_data[pos+1:-1]]
    df = pd.DataFrame(data[1:], columns=data[0])
    
    # Remove the last 19 columns
    new_columns = list(df.columns[:-19])
    # Remove the column at position 1 - IDENTIFIER
    new_columns.pop(1)
    PCOS = []
    
    # Additionally ignore the last 1 column
    # Get the mapping - PCOS, Normal, Other == [1, 0, -1]
    for j in new_columns[1:-1]:
        if j in PCOS_mapping[i]: 
            PCOS.append(1)
        elif j in control_mapping[i]: 
            PCOS.append(0)
        else:
            PCOS.append(-1)
    
    # Set df to be only equal to the required columns
    df = df[new_columns[:-1]]

    # Replace the ID_REF using the geneid_mapping
    # df.replace({"ID_REF":mapping}, inplace=True) # Has a large overhead. Using map instead
    df["ID_REF"] = df["ID_REF"].map(lambda x: mapping.get(x, np.nan))
    display(df)
    
    # Remove all columns that have a mapping to null :)
    # Basically remove columns that don't map to a valid Entrez ID
    print("Removing all columns that have no Entrez mapping ...")
    df.drop(index=np.where(pd.isnull(df["ID_REF"]))[0], inplace=True)
    display(df)
    
    print("Unique IDs:", len(set(df["ID_REF"])), "Actual Size:", len(df["ID_REF"]))
    # a = list(np.where(df["ID_REF"].duplicated())[0])
    # values = df["ID_REF"].reset_index().loc[a]["ID_REF"]
    # print("Duplicates:", values)
    
    for col in new_columns[1:-1]:
        try:
            df[col] = df[col].astype(float)
        except:
            df[col] = df[col].str.replace("null", str(np.nan)).astype(float)
    
    print("Eliminating all duplicates, replacing them with averages ...")
    df = df.groupby("ID_REF").mean().reset_index()
    display(df)
    
    print("Checking for spurious gene IDs (if any) ...")
    for name in df["ID_REF"]:
        if name not in unique_geneids:
            print(name)
    
    print("Small Additional Crosscheck ...")
    print(np.where(df["ID_REF"].isin([6071])))
    
    display(df)
    
    for col in new_columns[1:-1]:
        try:
            df[col] = df[col].astype(float)
        except:
            df[col] = df[col].str.replace("null", str(np.nan)).astype(float)
    
    print("Eliminating all duplicates, replacing them with averages ...")
    df = df.groupby("ID_REF").mean().reset_index()
            
    display(df)
    
    # Transpose and add PCOS mapping column
    df = df[new_columns[:-1]].T 
    df.columns = df.iloc[0]
    df = df[1:]
    
    print("Scanning for additional columns to add ...")
    time.sleep(2)
    for j in tqdm(unique_geneids):
        if (type(j) == str or type(j) == int) and (j not in df.columns):
            df[j] = np.nan
    
    df = df[unique_geneids]
    
    df["PCOS"] = PCOS
    df.index.rename('sample_id', inplace=True)
    
    display(df)
    print("="*100)
    
    df.to_csv(i + ".csv")
    df_list.append(df)

GDS1050


Unnamed: 0,ID_REF,GSM27536,GSM27537,GSM27538,GSM27540,GSM27541,GSM27531,GSM27532,GSM27533,GSM27534,GSM27543,GSM27546,GSM27548,GSM27549
0,780_100616237,540.3,801,701.4,540.9,570,676.6,769.2,604.3,584.2,699,830.5,600.9,612
1,5982,98.9,48.2,90.9,53.8,57.5,39,45.2,50.1,51,81.8,80.2,97.7,59
2,3310,75.8,39.5,36.2,21.6,56.2,41.3,32.6,77.7,57.5,74.6,11.8,49.4,46.1
3,7849,561.3,433.6,395.6,414.6,606.6,585.3,527,501.3,506.5,743,696.3,736.3,757
4,2978,38.2,26.3,22,17.8,24.5,28.6,24.9,14,14.4,43.5,62,67.6,49
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22278,,5.7,4.3,3.5,1.7,2.8,6.9,2.4,4.1,6.9,6.9,6.1,5,4.8
22279,,17.4,7.5,12,17.9,20.7,15.4,12.1,13.3,3,9,22,17.9,27
22280,,2,0.9,8.3,0.6,4.7,6.1,1.7,1.1,0.8,6.1,11.4,12,1.3
22281,,8.2,5.4,14.6,2.1,26.9,3.9,3.6,3.7,4.5,8.7,8.1,4.2,4.9


Removing all columns that have no Entrez mapping ...


Unnamed: 0,ID_REF,GSM27536,GSM27537,GSM27538,GSM27540,GSM27541,GSM27531,GSM27532,GSM27533,GSM27534,GSM27543,GSM27546,GSM27548,GSM27549
0,780_100616237,540.3,801,701.4,540.9,570,676.6,769.2,604.3,584.2,699,830.5,600.9,612
1,5982,98.9,48.2,90.9,53.8,57.5,39,45.2,50.1,51,81.8,80.2,97.7,59
2,3310,75.8,39.5,36.2,21.6,56.2,41.3,32.6,77.7,57.5,74.6,11.8,49.4,46.1
3,7849,561.3,433.6,395.6,414.6,606.6,585.3,527,501.3,506.5,743,696.3,736.3,757
4,2978,38.2,26.3,22,17.8,24.5,28.6,24.9,14,14.4,43.5,62,67.6,49
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22232,2597,9624.9,11214.3,8624.7,8831.7,7978.6,7441.8,8402.3,9742.5,9285.3,9197.3,9816.9,7740.6,7795.3
22233,6772,242.8,362.1,447.9,2115.8,320.4,228.8,268,199.8,285,271.5,243.6,288.2,278.7
22234,6772,11.3,15.6,20.5,112.6,40.2,34.3,33.1,42.5,39.9,74.3,54.3,67.6,58.3
22235,6772,13.2,44.1,41.1,288.4,139.1,96.3,106.7,92.2,140.3,130.8,79.3,140.5,120.6


Unique IDs: 13299 Actual Size: 21156
Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM27536,GSM27537,GSM27538,GSM27540,GSM27541,GSM27531,GSM27532,GSM27533,GSM27534,GSM27543,GSM27546,GSM27548,GSM27549
0,2,33.20,46.800000,33.70,21.600000,13.100000,47.100000,25.700000,48.30,38.20,69.500000,43.400000,20.80,48.40
1,9,67.60,140.100000,87.10,142.400000,114.600000,101.700000,84.700000,80.00,84.80,97.200000,101.600000,96.70,116.30
2,10,15.00,18.000000,4.00,10.900000,10.700000,8.100000,14.700000,6.30,10.90,9.500000,11.500000,26.20,32.60
3,12,442.60,1061.400000,1142.60,134.800000,250.900000,762.700000,415.400000,776.60,657.00,799.500000,527.200000,923.40,900.30
4,13,7.40,8.100000,6.70,8.600000,10.500000,7.800000,8.700000,10.40,7.00,7.700000,33.900000,10.60,40.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13294,9772_102466911,62.25,54.600000,59.40,41.950000,58.300000,51.250000,51.800000,52.75,49.40,65.000000,61.850000,70.65,66.95
13295,9782_724102,310.80,760.233333,520.70,592.266667,821.066667,444.166667,523.333333,456.80,442.00,369.333333,319.066667,388.40,451.70
13296,984_728642,109.34,110.760000,109.10,101.800000,95.860000,112.660000,97.860000,100.86,102.42,88.700000,83.380000,63.84,73.46
13297,9859_645455,58.15,118.450000,117.35,132.350000,158.000000,98.700000,122.150000,114.35,79.85,110.000000,91.800000,114.80,116.65


Checking for spurious gene IDs (if any) ...
Small Additional Crosscheck ...
(array([], dtype=int64),)


Unnamed: 0,ID_REF,GSM27536,GSM27537,GSM27538,GSM27540,GSM27541,GSM27531,GSM27532,GSM27533,GSM27534,GSM27543,GSM27546,GSM27548,GSM27549
0,2,33.20,46.800000,33.70,21.600000,13.100000,47.100000,25.700000,48.30,38.20,69.500000,43.400000,20.80,48.40
1,9,67.60,140.100000,87.10,142.400000,114.600000,101.700000,84.700000,80.00,84.80,97.200000,101.600000,96.70,116.30
2,10,15.00,18.000000,4.00,10.900000,10.700000,8.100000,14.700000,6.30,10.90,9.500000,11.500000,26.20,32.60
3,12,442.60,1061.400000,1142.60,134.800000,250.900000,762.700000,415.400000,776.60,657.00,799.500000,527.200000,923.40,900.30
4,13,7.40,8.100000,6.70,8.600000,10.500000,7.800000,8.700000,10.40,7.00,7.700000,33.900000,10.60,40.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13294,9772_102466911,62.25,54.600000,59.40,41.950000,58.300000,51.250000,51.800000,52.75,49.40,65.000000,61.850000,70.65,66.95
13295,9782_724102,310.80,760.233333,520.70,592.266667,821.066667,444.166667,523.333333,456.80,442.00,369.333333,319.066667,388.40,451.70
13296,984_728642,109.34,110.760000,109.10,101.800000,95.860000,112.660000,97.860000,100.86,102.42,88.700000,83.380000,63.84,73.46
13297,9859_645455,58.15,118.450000,117.35,132.350000,158.000000,98.700000,122.150000,114.35,79.85,110.000000,91.800000,114.80,116.65


Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM27536,GSM27537,GSM27538,GSM27540,GSM27541,GSM27531,GSM27532,GSM27533,GSM27534,GSM27543,GSM27546,GSM27548,GSM27549
0,2,33.20,46.800000,33.70,21.600000,13.100000,47.100000,25.700000,48.30,38.20,69.500000,43.400000,20.80,48.40
1,9,67.60,140.100000,87.10,142.400000,114.600000,101.700000,84.700000,80.00,84.80,97.200000,101.600000,96.70,116.30
2,10,15.00,18.000000,4.00,10.900000,10.700000,8.100000,14.700000,6.30,10.90,9.500000,11.500000,26.20,32.60
3,12,442.60,1061.400000,1142.60,134.800000,250.900000,762.700000,415.400000,776.60,657.00,799.500000,527.200000,923.40,900.30
4,13,7.40,8.100000,6.70,8.600000,10.500000,7.800000,8.700000,10.40,7.00,7.700000,33.900000,10.60,40.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13294,9772_102466911,62.25,54.600000,59.40,41.950000,58.300000,51.250000,51.800000,52.75,49.40,65.000000,61.850000,70.65,66.95
13295,9782_724102,310.80,760.233333,520.70,592.266667,821.066667,444.166667,523.333333,456.80,442.00,369.333333,319.066667,388.40,451.70
13296,984_728642,109.34,110.760000,109.10,101.800000,95.860000,112.660000,97.860000,100.86,102.42,88.700000,83.380000,63.84,73.46
13297,9859_645455,58.15,118.450000,117.35,132.350000,158.000000,98.700000,122.150000,114.35,79.85,110.000000,91.800000,114.80,116.65


Scanning for additional columns to add ...


100%|██████████| 25180/25180 [00:23<00:00, 1052.78it/s]


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,393046,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM27536,,33.2,,,67.6,15.0,442.6,7.4,365.0,5.5,...,,55.5,,,,48.55,,21.8,,1
GSM27537,,46.8,,,140.1,18.0,1061.4,8.1,200.5,4.3,...,,69.3,,,,11.45,,5.6,,1
GSM27538,,33.7,,,87.1,4.0,1142.6,6.7,197.6,3.1,...,,94.6,,,,32.7,,45.9,,1
GSM27540,,21.6,,,142.4,10.9,134.8,8.6,214.4,2.9,...,,69.1,,,,16.55,,47.5,,1
GSM27541,,13.1,,,114.6,10.7,250.9,10.5,222.2,4.1,...,,85.9,,,,15.9,,25.6,,1
GSM27531,,47.1,,,101.7,8.1,762.7,7.8,309.1,2.9,...,,70.8,,,,26.4,,6.9,,0
GSM27532,,25.7,,,84.7,14.7,415.4,8.7,236.7,4.4,...,,75.8,,,,16.3,,26.5,,0
GSM27533,,48.3,,,80.0,6.3,776.6,10.4,310.3,2.1,...,,62.4,,,,15.15,,12.3,,0
GSM27534,,38.2,,,84.8,10.9,657.0,7.0,281.7,4.0,...,,68.3,,,,6.1,,9.7,,0
GSM27543,,69.5,,,97.2,9.5,799.5,7.7,287.2,5.1,...,,73.8,,,,31.15,,19.6,,0


GDS1051


Unnamed: 0,ID_REF,GSM29645,GSM29646,GSM29647,GSM29648,GSM29649,GSM29537,GSM29638,GSM29643,GSM29644,GSM29650,GSM29651,GSM29652,GSM29653
0,10594,2291.7,1203.4,1553.1,969.5,1234.7,1766.3,2776,2685,2636.1,1419.9,2299.2,922,1608.1
1,826,9096.1,5326.1,6787.8,5953.7,6260.2,12617.2,10204.5,8674.5,9308,5771.2,8053.7,5158.6,4895.9
2,11224,21086.9,12777.1,12945.5,13443,10435.5,14999.4,19289.3,15698.5,15923.2,12202.2,16572.2,11260.1,11946.9
3,102465483_6158,27971.5,20778.2,19074.7,18613,18431.2,21188.3,22280.8,21876.1,24278.6,21732.7,30155.5,22067.7,21091.7
4,1982,6570.2,6469.9,5711.8,5408.9,8110.2,5614.3,8585.1,6424,7300.6,6157.3,8205.1,5972.6,7114.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22640,,52.5,23.4,11.1,12.3,14,11.9,6.1,13.9,13.4,22.5,12.2,16.6,27.1
22641,,57.1,13.6,13.2,7.2,16.5,8,3.9,29.9,37.8,83.9,17.1,32.4,45.5
22642,,36.7,19.5,3.9,21.7,8.3,4,3.3,2.4,4.5,36.5,17.3,23.3,3.8
22643,,23.1,14.5,15.3,33.8,17.5,7.8,16.7,48.7,38,50.8,45.8,37,63.3


Removing all columns that have no Entrez mapping ...


Unnamed: 0,ID_REF,GSM29645,GSM29646,GSM29647,GSM29648,GSM29649,GSM29537,GSM29638,GSM29643,GSM29644,GSM29650,GSM29651,GSM29652,GSM29653
0,10594,2291.7,1203.4,1553.1,969.5,1234.7,1766.3,2776,2685,2636.1,1419.9,2299.2,922,1608.1
1,826,9096.1,5326.1,6787.8,5953.7,6260.2,12617.2,10204.5,8674.5,9308,5771.2,8053.7,5158.6,4895.9
2,11224,21086.9,12777.1,12945.5,13443,10435.5,14999.4,19289.3,15698.5,15923.2,12202.2,16572.2,11260.1,11946.9
3,102465483_6158,27971.5,20778.2,19074.7,18613,18431.2,21188.3,22280.8,21876.1,24278.6,21732.7,30155.5,22067.7,21091.7
4,1982,6570.2,6469.9,5711.8,5408.9,8110.2,5614.3,8585.1,6424,7300.6,6157.3,8205.1,5972.6,7114.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22594,2597,30232.3,27222.1,22510,28209.6,19972.3,21289.9,35714.4,30872.4,29776.6,31365.5,35572.3,21008.4,23678.3
22595,6772,778.3,843.7,1134.1,4377.3,825.1,979.7,842.1,609.6,935.1,673.3,804.7,723,867.7
22596,6772,27,21.3,55.7,318.1,109.5,110.4,108.5,108.8,174.5,183.5,157.8,206.1,182.8
22597,6772,95.7,46.8,165.7,1154.5,349.4,366.5,453.7,372.3,564.1,325.2,400.6,320.7,400.1


Unique IDs: 10759 Actual Size: 16667
Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM29645,GSM29646,GSM29647,GSM29648,GSM29649,GSM29537,GSM29638,GSM29643,GSM29644,GSM29650,GSM29651,GSM29652,GSM29653
0,1,309.30,383.6,329.10,247.30,187.70,345.60,267.2,302.30,436.30,483.10,525.90,422.4,341.60
1,3,211.80,164.5,219.10,147.10,91.60,123.90,177.0,188.00,113.50,264.70,243.60,250.7,188.30
2,23,2451.00,1789.9,1646.60,1851.90,1395.40,1255.30,1644.4,1862.90,1535.50,933.00,813.80,835.0,1180.00
3,27,207.50,264.2,326.75,308.15,349.30,191.75,280.8,336.20,334.80,127.00,141.60,110.7,180.65
4,36,84.90,169.4,86.00,162.80,212.90,164.00,158.4,136.70,108.60,191.60,111.20,161.3,230.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10754,94027_4909,24.80,20.0,18.90,17.80,16.60,13.50,29.3,31.60,11.30,27.60,26.90,14.6,16.40
10755,9586_401317,143.25,128.9,104.10,85.05,162.10,130.45,147.5,80.75,102.35,114.20,126.25,123.2,107.65
10756,96626_100288695,575.30,226.3,297.30,225.30,240.10,680.60,467.3,256.70,344.60,423.80,434.90,517.7,302.30
10757,9782_724102,42.60,75.1,56.10,65.45,54.35,50.75,36.0,35.55,70.80,67.55,39.90,84.2,76.80


Checking for spurious gene IDs (if any) ...
Small Additional Crosscheck ...
(array([], dtype=int64),)


Unnamed: 0,ID_REF,GSM29645,GSM29646,GSM29647,GSM29648,GSM29649,GSM29537,GSM29638,GSM29643,GSM29644,GSM29650,GSM29651,GSM29652,GSM29653
0,1,309.30,383.6,329.10,247.30,187.70,345.60,267.2,302.30,436.30,483.10,525.90,422.4,341.60
1,3,211.80,164.5,219.10,147.10,91.60,123.90,177.0,188.00,113.50,264.70,243.60,250.7,188.30
2,23,2451.00,1789.9,1646.60,1851.90,1395.40,1255.30,1644.4,1862.90,1535.50,933.00,813.80,835.0,1180.00
3,27,207.50,264.2,326.75,308.15,349.30,191.75,280.8,336.20,334.80,127.00,141.60,110.7,180.65
4,36,84.90,169.4,86.00,162.80,212.90,164.00,158.4,136.70,108.60,191.60,111.20,161.3,230.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10754,94027_4909,24.80,20.0,18.90,17.80,16.60,13.50,29.3,31.60,11.30,27.60,26.90,14.6,16.40
10755,9586_401317,143.25,128.9,104.10,85.05,162.10,130.45,147.5,80.75,102.35,114.20,126.25,123.2,107.65
10756,96626_100288695,575.30,226.3,297.30,225.30,240.10,680.60,467.3,256.70,344.60,423.80,434.90,517.7,302.30
10757,9782_724102,42.60,75.1,56.10,65.45,54.35,50.75,36.0,35.55,70.80,67.55,39.90,84.2,76.80


Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM29645,GSM29646,GSM29647,GSM29648,GSM29649,GSM29537,GSM29638,GSM29643,GSM29644,GSM29650,GSM29651,GSM29652,GSM29653
0,1,309.30,383.6,329.10,247.30,187.70,345.60,267.2,302.30,436.30,483.10,525.90,422.4,341.60
1,3,211.80,164.5,219.10,147.10,91.60,123.90,177.0,188.00,113.50,264.70,243.60,250.7,188.30
2,23,2451.00,1789.9,1646.60,1851.90,1395.40,1255.30,1644.4,1862.90,1535.50,933.00,813.80,835.0,1180.00
3,27,207.50,264.2,326.75,308.15,349.30,191.75,280.8,336.20,334.80,127.00,141.60,110.7,180.65
4,36,84.90,169.4,86.00,162.80,212.90,164.00,158.4,136.70,108.60,191.60,111.20,161.3,230.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10754,94027_4909,24.80,20.0,18.90,17.80,16.60,13.50,29.3,31.60,11.30,27.60,26.90,14.6,16.40
10755,9586_401317,143.25,128.9,104.10,85.05,162.10,130.45,147.5,80.75,102.35,114.20,126.25,123.2,107.65
10756,96626_100288695,575.30,226.3,297.30,225.30,240.10,680.60,467.3,256.70,344.60,423.80,434.90,517.7,302.30
10757,9782_724102,42.60,75.1,56.10,65.45,54.35,50.75,36.0,35.55,70.80,67.55,39.90,84.2,76.80


Scanning for additional columns to add ...


100%|██████████| 25180/25180 [00:28<00:00, 884.66it/s] 


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,393046,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM29645,309.3,,211.8,58.0,,,,,,,...,,429.7,446.4,3.5,15.75,,,,13.45,1
GSM29646,383.6,,164.5,155.7,,,,,,,...,,582.1,417.7,40.7,12.8,,,,63.45,1
GSM29647,329.1,,219.1,78.9,,,,,,,...,,453.0,322.0,6.0,11.75,,,,21.05,1
GSM29648,247.3,,147.1,114.1,,,,,,,...,,392.0,422.9,35.7,11.9,,,,43.35,1
GSM29649,187.7,,91.6,165.3,,,,,,,...,,429.6,423.6,8.8,9.3,,,,11.8,1
GSM29537,345.6,,123.9,120.4,,,,,,,...,,417.4,385.2,7.4,13.15,,,,8.1,0
GSM29638,267.2,,177.0,60.1,,,,,,,...,,331.8,326.6,4.8,19.9,,,,30.8,0
GSM29643,302.3,,188.0,94.1,,,,,,,...,,377.2,398.8,7.5,32.25,,,,27.4,0
GSM29644,436.3,,113.5,70.3,,,,,,,...,,249.2,310.9,6.6,8.6,,,,25.55,0
GSM29650,483.1,,264.7,56.0,,,,,,,...,,540.2,436.7,36.7,30.5,,,,67.75,0


GDS2084


Unnamed: 0,ID_REF,GSM114841,GSM114844,GSM114845,GSM114849,GSM114851,GSM114854,GSM114855,GSM114834,GSM114842,GSM114843,GSM114847,GSM114848,GSM114850,GSM114852,GSM114853
0,780_100616237,222.6,252.7,219.3,258.9,239,286,230.1,197.1,254.4,296.5,171.1,268.9,251.2,301.9,234.3
1,5982,35.5,24.5,23.4,31.4,20.6,26.1,24.3,26.9,31.4,27.1,25.9,40.5,22.2,24.6,31.3
2,3310,41.5,53.3,31.3,43,65.5,39.6,68.5,46.9,61.7,93.7,68.5,79.6,40,43.2,53.4
3,7849,229.8,419.6,274.5,227.1,271.6,428.7,333.4,221.1,291.5,399.8,307.1,364.8,326.1,387.2,400.9
4,2978,14.3,13,29.6,16.3,4.6,10.7,7.8,2.4,13.9,24.7,3.8,14.3,1.9,12,11.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22278,,1.9,2.2,10.1,2.9,1.5,2.2,1.4,1.7,2.1,3.4,3.3,1.3,2.6,2.7,2.2
22279,,4.3,2.5,9.6,4.4,1.8,1.3,2.1,2,2,6.3,4,3,3.4,3.9,5.3
22280,,0.5,0.5,4.1,0.6,0.3,0.7,0.4,0.5,0.5,0.4,0.3,0.8,0.5,0.4,0.2
22281,,2.6,2,6.1,1.2,1.2,1.3,1.6,1.6,1.6,2.1,1.5,1.6,1,0.6,0.7


Removing all columns that have no Entrez mapping ...


Unnamed: 0,ID_REF,GSM114841,GSM114844,GSM114845,GSM114849,GSM114851,GSM114854,GSM114855,GSM114834,GSM114842,GSM114843,GSM114847,GSM114848,GSM114850,GSM114852,GSM114853
0,780_100616237,222.6,252.7,219.3,258.9,239,286,230.1,197.1,254.4,296.5,171.1,268.9,251.2,301.9,234.3
1,5982,35.5,24.5,23.4,31.4,20.6,26.1,24.3,26.9,31.4,27.1,25.9,40.5,22.2,24.6,31.3
2,3310,41.5,53.3,31.3,43,65.5,39.6,68.5,46.9,61.7,93.7,68.5,79.6,40,43.2,53.4
3,7849,229.8,419.6,274.5,227.1,271.6,428.7,333.4,221.1,291.5,399.8,307.1,364.8,326.1,387.2,400.9
4,2978,14.3,13,29.6,16.3,4.6,10.7,7.8,2.4,13.9,24.7,3.8,14.3,1.9,12,11.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22232,2597,1408.1,1277.4,1235.9,1084.8,1074.1,1025.7,1425.5,1167.1,1119.8,1125.2,1353.5,902.9,1023.9,1144.2,1159.1
22233,6772,221.7,268.1,212.3,397.5,263.5,274.2,279.9,251.8,289.3,184.9,293.2,365.9,293.5,296.4,360.3
22234,6772,25,19.1,17.3,29.4,21.9,9.7,13.3,18.1,7.3,15.6,18,19.2,1.7,2.2,6.9
22235,6772,86.5,95,76.7,102.7,77.9,51.1,60,61.6,68.2,66.5,73.5,85.5,46.6,53.2,79.3


Unique IDs: 13299 Actual Size: 21156
Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM114841,GSM114844,GSM114845,GSM114849,GSM114851,GSM114854,GSM114855,GSM114834,GSM114842,GSM114843,GSM114847,GSM114848,GSM114850,GSM114852,GSM114853
0,2,2151.200000,1537.60,1473.000000,1704.200000,1551.200000,1675.50,1715.800000,1397.700000,2000.300000,1468.900000,1681.200000,1375.500000,1634.40,1513.500000,1636.200000
1,9,71.000000,52.60,58.900000,49.100000,48.100000,42.50,56.100000,61.100000,46.200000,62.300000,57.900000,54.200000,39.90,39.600000,41.700000
2,10,8.600000,7.50,12.200000,8.000000,5.700000,10.90,7.000000,6.400000,10.500000,13.200000,7.700000,5.700000,6.50,9.700000,13.600000
3,12,25.800000,35.40,52.200000,29.200000,49.200000,27.60,139.400000,6.700000,20.400000,64.400000,45.900000,28.200000,116.10,33.600000,29.300000
4,13,212.200000,59.20,9.000000,192.800000,51.600000,180.40,6.400000,285.100000,74.000000,372.000000,5.600000,209.800000,170.10,178.700000,179.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13294,9772_102466911,41.450000,61.65,65.150000,40.950000,43.550000,43.05,37.150000,35.600000,52.300000,65.650000,32.900000,65.350000,30.70,42.150000,41.650000
13295,9782_724102,763.366667,642.70,598.733333,707.766667,635.533333,594.40,545.466667,724.466667,699.033333,556.533333,515.333333,649.866667,659.90,684.566667,588.166667
13296,984_728642,75.780000,45.60,48.760000,65.360000,82.480000,40.70,59.880000,54.240000,65.420000,64.180000,57.000000,61.660000,62.90,59.780000,42.100000
13297,9859_645455,159.700000,112.40,82.850000,168.000000,88.250000,145.95,108.750000,140.750000,139.750000,99.550000,101.850000,146.500000,150.15,152.050000,136.050000


Checking for spurious gene IDs (if any) ...
Small Additional Crosscheck ...
(array([], dtype=int64),)


Unnamed: 0,ID_REF,GSM114841,GSM114844,GSM114845,GSM114849,GSM114851,GSM114854,GSM114855,GSM114834,GSM114842,GSM114843,GSM114847,GSM114848,GSM114850,GSM114852,GSM114853
0,2,2151.200000,1537.60,1473.000000,1704.200000,1551.200000,1675.50,1715.800000,1397.700000,2000.300000,1468.900000,1681.200000,1375.500000,1634.40,1513.500000,1636.200000
1,9,71.000000,52.60,58.900000,49.100000,48.100000,42.50,56.100000,61.100000,46.200000,62.300000,57.900000,54.200000,39.90,39.600000,41.700000
2,10,8.600000,7.50,12.200000,8.000000,5.700000,10.90,7.000000,6.400000,10.500000,13.200000,7.700000,5.700000,6.50,9.700000,13.600000
3,12,25.800000,35.40,52.200000,29.200000,49.200000,27.60,139.400000,6.700000,20.400000,64.400000,45.900000,28.200000,116.10,33.600000,29.300000
4,13,212.200000,59.20,9.000000,192.800000,51.600000,180.40,6.400000,285.100000,74.000000,372.000000,5.600000,209.800000,170.10,178.700000,179.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13294,9772_102466911,41.450000,61.65,65.150000,40.950000,43.550000,43.05,37.150000,35.600000,52.300000,65.650000,32.900000,65.350000,30.70,42.150000,41.650000
13295,9782_724102,763.366667,642.70,598.733333,707.766667,635.533333,594.40,545.466667,724.466667,699.033333,556.533333,515.333333,649.866667,659.90,684.566667,588.166667
13296,984_728642,75.780000,45.60,48.760000,65.360000,82.480000,40.70,59.880000,54.240000,65.420000,64.180000,57.000000,61.660000,62.90,59.780000,42.100000
13297,9859_645455,159.700000,112.40,82.850000,168.000000,88.250000,145.95,108.750000,140.750000,139.750000,99.550000,101.850000,146.500000,150.15,152.050000,136.050000


Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM114841,GSM114844,GSM114845,GSM114849,GSM114851,GSM114854,GSM114855,GSM114834,GSM114842,GSM114843,GSM114847,GSM114848,GSM114850,GSM114852,GSM114853
0,2,2151.200000,1537.60,1473.000000,1704.200000,1551.200000,1675.50,1715.800000,1397.700000,2000.300000,1468.900000,1681.200000,1375.500000,1634.40,1513.500000,1636.200000
1,9,71.000000,52.60,58.900000,49.100000,48.100000,42.50,56.100000,61.100000,46.200000,62.300000,57.900000,54.200000,39.90,39.600000,41.700000
2,10,8.600000,7.50,12.200000,8.000000,5.700000,10.90,7.000000,6.400000,10.500000,13.200000,7.700000,5.700000,6.50,9.700000,13.600000
3,12,25.800000,35.40,52.200000,29.200000,49.200000,27.60,139.400000,6.700000,20.400000,64.400000,45.900000,28.200000,116.10,33.600000,29.300000
4,13,212.200000,59.20,9.000000,192.800000,51.600000,180.40,6.400000,285.100000,74.000000,372.000000,5.600000,209.800000,170.10,178.700000,179.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13294,9772_102466911,41.450000,61.65,65.150000,40.950000,43.550000,43.05,37.150000,35.600000,52.300000,65.650000,32.900000,65.350000,30.70,42.150000,41.650000
13295,9782_724102,763.366667,642.70,598.733333,707.766667,635.533333,594.40,545.466667,724.466667,699.033333,556.533333,515.333333,649.866667,659.90,684.566667,588.166667
13296,984_728642,75.780000,45.60,48.760000,65.360000,82.480000,40.70,59.880000,54.240000,65.420000,64.180000,57.000000,61.660000,62.90,59.780000,42.100000
13297,9859_645455,159.700000,112.40,82.850000,168.000000,88.250000,145.95,108.750000,140.750000,139.750000,99.550000,101.850000,146.500000,150.15,152.050000,136.050000


Scanning for additional columns to add ...


100%|██████████| 25180/25180 [00:21<00:00, 1151.21it/s]


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,393046,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM114841,,2151.2,,,71.0,8.6,25.8,212.2,193.8,2.7,...,,42.1,,,,22.3,,2.4,,0
GSM114844,,1537.6,,,52.6,7.5,35.4,59.2,123.7,3.5,...,,51.9,,,,15.3,,3.7,,0
GSM114845,,1473.0,,,58.9,12.2,52.2,9.0,171.1,11.5,...,,22.0,,,,11.55,,7.7,,0
GSM114849,,1704.2,,,49.1,8.0,29.2,192.8,117.6,1.6,...,,32.7,,,,50.65,,8.7,,0
GSM114851,,1551.2,,,48.1,5.7,49.2,51.6,162.9,2.3,...,,35.8,,,,26.6,,2.8,,0
GSM114854,,1675.5,,,42.5,10.9,27.6,180.4,155.0,1.4,...,,39.5,,,,21.55,,5.4,,0
GSM114855,,1715.8,,,56.1,7.0,139.4,6.4,119.1,1.9,...,,28.3,,,,22.4,,2.7,,0
GSM114834,,1397.7,,,61.1,6.4,6.7,285.1,128.8,0.9,...,,5.6,,,,38.45,,10.2,,1
GSM114842,,2000.3,,,46.2,10.5,20.4,74.0,139.0,2.8,...,,41.1,,,,11.35,,4.3,,1
GSM114843,,1468.9,,,62.3,13.2,64.4,372.0,168.5,3.5,...,,38.8,,,,16.2,,5.9,,1


GDS3104


Unnamed: 0,ID_REF,GSM155631,GSM155643,GSM155644,GSM155729,GSM156170,GSM156171,GSM156176,GSM156177,GSM156178,...,GSM156750,GSM156751,GSM156752,GSM156753,GSM156763,GSM156946,GSM156948,GSM156949,GSM156950,GSM156951
0,780_100616237,494.091,429.377,483.314,370.79,440.023,471.122,522.423,463.973,403.907,...,471.417,499.061,421.734,448.596,519.123,494.733,450.936,470.474,504.179,483.382
1,5982,70.1196,50.927,57.5347,68.8989,88.3869,64.3309,64.8592,41.726,58.2168,...,69.6882,56.149,65.4412,62.0778,99.9459,69.8443,69.2457,58.2555,60.2795,49.8063
2,3310,169.272,181.858,203.872,144.182,134.05,154.556,157.932,184.023,136.665,...,138.755,184.073,133.169,137.113,137.294,123.032,136.516,113.085,179.123,158.224
3,7849,433.047,470.4,433.667,416.779,395.866,424.55,470.681,459.183,437.931,...,428.169,479.99,371.751,382.77,432.799,388.83,413.838,385.33,479.524,437.328
4,2978,23.6958,22.7677,22.1392,25.5855,25.0873,28.3204,35.2135,18.236,37.0968,...,19.0747,23.0635,34.8153,30.1676,29.3056,24.2784,30.9826,25.0982,23.7186,23.0929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54670,,34.7916,18.7041,21.6336,33.941,34.1386,24.2582,31.9327,23.6448,26.8522,...,34.2419,26.5655,29.949,25.3506,33.8185,22.5942,24.109,21.4239,25.3657,32.9466
54671,,13.3606,11.6228,12.9994,11.2103,10.0443,12.4018,13.9776,9.92973,10.8037,...,14.0283,13.4215,9.89885,13.4818,15.5841,11.4173,9.52056,10.5497,13.26,10.4772
54672,,10.0436,12.2426,12.9526,10.3604,9.32941,11.5884,10.5051,12.8234,9.59493,...,8.34787,12.8996,8.86309,7.78588,13.3112,8.57294,9.23505,10.6191,10.1967,9.89287
54673,,24.0888,15.9718,20.667,14.4338,20.5394,14.052,18.1273,17.9992,13.0228,...,12.7295,26.3269,15.6074,12.8007,23.4049,13.6602,14.0634,15.268,22.8486,18.0097


Removing all columns that have no Entrez mapping ...


Unnamed: 0,ID_REF,GSM155631,GSM155643,GSM155644,GSM155729,GSM156170,GSM156171,GSM156176,GSM156177,GSM156178,...,GSM156750,GSM156751,GSM156752,GSM156753,GSM156763,GSM156946,GSM156948,GSM156949,GSM156950,GSM156951
0,780_100616237,494.091,429.377,483.314,370.79,440.023,471.122,522.423,463.973,403.907,...,471.417,499.061,421.734,448.596,519.123,494.733,450.936,470.474,504.179,483.382
1,5982,70.1196,50.927,57.5347,68.8989,88.3869,64.3309,64.8592,41.726,58.2168,...,69.6882,56.149,65.4412,62.0778,99.9459,69.8443,69.2457,58.2555,60.2795,49.8063
2,3310,169.272,181.858,203.872,144.182,134.05,154.556,157.932,184.023,136.665,...,138.755,184.073,133.169,137.113,137.294,123.032,136.516,113.085,179.123,158.224
3,7849,433.047,470.4,433.667,416.779,395.866,424.55,470.681,459.183,437.931,...,428.169,479.99,371.751,382.77,432.799,388.83,413.838,385.33,479.524,437.328
4,2978,23.6958,22.7677,22.1392,25.5855,25.0873,28.3204,35.2135,18.236,37.0968,...,19.0747,23.0635,34.8153,30.1676,29.3056,24.2784,30.9826,25.0982,23.7186,23.0929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54630,2597,10251.6,10169.3,9923.17,12526.5,10125.5,10314.3,10925.8,10636.5,8340.15,...,10180.6,11117.8,10741.2,11905.5,11590.9,9738.28,11502.3,9755.45,10235.9,11354.2
54631,6772,357.264,244.005,223.804,203.644,313.99,317.738,243.347,254.441,287.714,...,364.32,193.699,280.872,276.986,240.266,350.587,326.151,331.788,274.793,258.396
54632,6772,22.3471,18.3128,26.0131,9.78367,19.2337,22.2766,21.8454,18.0027,16.0342,...,16.9826,23.236,15.8333,16.3652,22.7206,20.5453,21.3748,13.9844,28.365,20.6442
54633,6772,78.6758,62.8006,67.893,77.8689,99.868,80.1086,83.8745,60.7733,82.4203,...,103.473,69.2718,83.0464,84.935,107.591,82.4824,98.7891,78.4151,72.3016,72.6825


Unique IDs: 22189 Actual Size: 45118
Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM155631,GSM155643,GSM155644,GSM155729,GSM156170,GSM156171,GSM156176,GSM156177,GSM156178,...,GSM156750,GSM156751,GSM156752,GSM156753,GSM156763,GSM156946,GSM156948,GSM156949,GSM156950,GSM156951
0,1,104.350000,91.242100,112.817000,91.162100,96.426200,101.231000,108.870000,93.073700,79.305300,...,97.891200,111.156000,81.800200,79.877300,116.100000,84.656600,111.487000,96.813200,117.464000,117.391000
1,2,670.221250,590.508550,572.414750,569.875600,445.795000,641.470250,498.569150,496.866750,592.987300,...,514.851950,427.682950,515.933900,483.343900,348.183000,432.403750,654.000100,510.499550,668.745650,460.764450
2,3,163.193000,153.165000,145.516000,269.472000,200.817000,198.705000,241.080000,147.700000,198.176000,...,236.749000,225.938000,160.772000,185.863000,202.883000,149.344000,182.083000,192.653000,230.852000,193.405000
3,9,19.622000,15.529500,15.592200,39.963300,30.679100,28.553400,29.203800,19.323100,35.440800,...,28.622200,16.780300,27.137500,26.732500,23.962500,21.039200,28.507500,32.734200,14.132200,19.006800
4,10,72.923800,100.396000,76.557700,127.490000,84.311300,99.049900,78.051500,60.340400,122.215000,...,80.520600,85.229500,81.203700,95.495400,72.630400,44.583100,92.165800,62.303400,78.439200,69.771300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,318.538678,282.905178,260.134578,199.974933,319.202222,263.519578,292.418422,300.391689,293.904822,...,304.285878,258.008278,302.844022,240.377944,269.397644,359.389589,296.310722,315.619467,268.207922,240.863278
22185,9802_102723983,111.251000,123.771000,129.458000,113.775000,91.775300,114.880000,107.041000,121.926000,121.413000,...,109.322000,100.270000,102.397000,110.429000,77.995800,65.874100,99.905400,80.075000,103.310000,111.601000
22186,984_728642,154.154480,130.551940,146.764880,138.480660,152.301660,132.718680,162.249300,165.111220,128.089620,...,146.711440,146.910060,143.011100,147.725600,175.181260,128.387000,134.716560,142.310220,145.099200,160.933880
22187,9859_645455,175.396567,160.039533,160.886700,176.747667,235.097433,183.262633,173.759100,177.540567,211.486967,...,186.042467,149.889633,225.355100,196.435033,200.712700,198.562367,200.612233,222.604500,172.749600,169.861033


Checking for spurious gene IDs (if any) ...
Small Additional Crosscheck ...
(array([], dtype=int64),)


Unnamed: 0,ID_REF,GSM155631,GSM155643,GSM155644,GSM155729,GSM156170,GSM156171,GSM156176,GSM156177,GSM156178,...,GSM156750,GSM156751,GSM156752,GSM156753,GSM156763,GSM156946,GSM156948,GSM156949,GSM156950,GSM156951
0,1,104.350000,91.242100,112.817000,91.162100,96.426200,101.231000,108.870000,93.073700,79.305300,...,97.891200,111.156000,81.800200,79.877300,116.100000,84.656600,111.487000,96.813200,117.464000,117.391000
1,2,670.221250,590.508550,572.414750,569.875600,445.795000,641.470250,498.569150,496.866750,592.987300,...,514.851950,427.682950,515.933900,483.343900,348.183000,432.403750,654.000100,510.499550,668.745650,460.764450
2,3,163.193000,153.165000,145.516000,269.472000,200.817000,198.705000,241.080000,147.700000,198.176000,...,236.749000,225.938000,160.772000,185.863000,202.883000,149.344000,182.083000,192.653000,230.852000,193.405000
3,9,19.622000,15.529500,15.592200,39.963300,30.679100,28.553400,29.203800,19.323100,35.440800,...,28.622200,16.780300,27.137500,26.732500,23.962500,21.039200,28.507500,32.734200,14.132200,19.006800
4,10,72.923800,100.396000,76.557700,127.490000,84.311300,99.049900,78.051500,60.340400,122.215000,...,80.520600,85.229500,81.203700,95.495400,72.630400,44.583100,92.165800,62.303400,78.439200,69.771300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,318.538678,282.905178,260.134578,199.974933,319.202222,263.519578,292.418422,300.391689,293.904822,...,304.285878,258.008278,302.844022,240.377944,269.397644,359.389589,296.310722,315.619467,268.207922,240.863278
22185,9802_102723983,111.251000,123.771000,129.458000,113.775000,91.775300,114.880000,107.041000,121.926000,121.413000,...,109.322000,100.270000,102.397000,110.429000,77.995800,65.874100,99.905400,80.075000,103.310000,111.601000
22186,984_728642,154.154480,130.551940,146.764880,138.480660,152.301660,132.718680,162.249300,165.111220,128.089620,...,146.711440,146.910060,143.011100,147.725600,175.181260,128.387000,134.716560,142.310220,145.099200,160.933880
22187,9859_645455,175.396567,160.039533,160.886700,176.747667,235.097433,183.262633,173.759100,177.540567,211.486967,...,186.042467,149.889633,225.355100,196.435033,200.712700,198.562367,200.612233,222.604500,172.749600,169.861033


Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM155631,GSM155643,GSM155644,GSM155729,GSM156170,GSM156171,GSM156176,GSM156177,GSM156178,...,GSM156750,GSM156751,GSM156752,GSM156753,GSM156763,GSM156946,GSM156948,GSM156949,GSM156950,GSM156951
0,1,104.350000,91.242100,112.817000,91.162100,96.426200,101.231000,108.870000,93.073700,79.305300,...,97.891200,111.156000,81.800200,79.877300,116.100000,84.656600,111.487000,96.813200,117.464000,117.391000
1,2,670.221250,590.508550,572.414750,569.875600,445.795000,641.470250,498.569150,496.866750,592.987300,...,514.851950,427.682950,515.933900,483.343900,348.183000,432.403750,654.000100,510.499550,668.745650,460.764450
2,3,163.193000,153.165000,145.516000,269.472000,200.817000,198.705000,241.080000,147.700000,198.176000,...,236.749000,225.938000,160.772000,185.863000,202.883000,149.344000,182.083000,192.653000,230.852000,193.405000
3,9,19.622000,15.529500,15.592200,39.963300,30.679100,28.553400,29.203800,19.323100,35.440800,...,28.622200,16.780300,27.137500,26.732500,23.962500,21.039200,28.507500,32.734200,14.132200,19.006800
4,10,72.923800,100.396000,76.557700,127.490000,84.311300,99.049900,78.051500,60.340400,122.215000,...,80.520600,85.229500,81.203700,95.495400,72.630400,44.583100,92.165800,62.303400,78.439200,69.771300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,318.538678,282.905178,260.134578,199.974933,319.202222,263.519578,292.418422,300.391689,293.904822,...,304.285878,258.008278,302.844022,240.377944,269.397644,359.389589,296.310722,315.619467,268.207922,240.863278
22185,9802_102723983,111.251000,123.771000,129.458000,113.775000,91.775300,114.880000,107.041000,121.926000,121.413000,...,109.322000,100.270000,102.397000,110.429000,77.995800,65.874100,99.905400,80.075000,103.310000,111.601000
22186,984_728642,154.154480,130.551940,146.764880,138.480660,152.301660,132.718680,162.249300,165.111220,128.089620,...,146.711440,146.910060,143.011100,147.725600,175.181260,128.387000,134.716560,142.310220,145.099200,160.933880
22187,9859_645455,175.396567,160.039533,160.886700,176.747667,235.097433,183.262633,173.759100,177.540567,211.486967,...,186.042467,149.889633,225.355100,196.435033,200.712700,198.562367,200.612233,222.604500,172.749600,169.861033


Scanning for additional columns to add ...


100%|██████████| 25180/25180 [00:07<00:00, 3466.10it/s]


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,393046,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM155631,104.35,670.221,163.193,88.8521,19.622,72.9238,251.359,99.4758,247.801,64.7368,...,,156.605,99.9452,23.0387,92.8232,286.581,,108.013,51.9724,0
GSM155643,91.2421,590.509,153.165,93.6215,15.5295,100.396,141.757,94.3241,219.067,49.4561,...,,149.458,80.1411,18.002,79.438,205.194,,110.803,41.8261,0
GSM155644,112.817,572.415,145.516,90.2267,15.5922,76.5577,136.427,94.948,249.907,59.1387,...,,148.393,73.6852,23.4213,83.8975,237.79,,113.004,41.4746,0
GSM155729,91.1621,569.876,269.472,82.5823,39.9633,127.49,157.935,112.656,199.577,51.2435,...,,114.824,132.53,18.0645,109.923,164.53,,123.757,88.7945,0
GSM156170,96.4262,445.795,200.817,96.8409,30.6791,84.3113,169.463,85.9437,207.488,55.5399,...,,149.864,105.321,25.1576,78.5269,295.914,,92.271,55.5922,0
GSM156171,101.231,641.47,198.705,144.814,28.5534,99.0499,189.32,95.6126,220.816,68.162,...,,160.119,120.733,20.6461,84.1994,157.078,,96.6274,57.7043,0
GSM156176,108.87,498.569,241.08,70.1289,29.2038,78.0515,169.301,89.8766,241.076,60.2328,...,,130.429,124.231,33.3963,97.8733,235.611,,102.052,52.1059,0
GSM156177,93.0737,496.867,147.7,86.8106,19.3231,60.3404,113.622,93.2597,205.232,55.4366,...,,164.941,74.4042,18.9079,82.5216,201.938,,99.4878,39.6922,0
GSM156178,79.3053,592.987,198.176,102.968,35.4408,122.215,154.038,116.969,199.872,33.3093,...,,140.002,117.36,20.2081,103.765,258.166,,119.219,80.3702,0
GSM156179,75.7921,617.488,211.01,131.015,39.6414,96.7656,153.498,99.0036,223.181,31.7261,...,,176.084,115.817,24.5608,88.9567,183.281,,100.878,66.7649,0


GDS3841


Unnamed: 0,ID_REF,GSM277438,GSM277439,GSM277440,GSM277441,GSM277442,GSM277443,GSM277444,GSM277445,GSM277446,...,GSM277451,GSM277452,GSM277453,GSM277454,GSM277455,GSM277456,GSM277457,GSM277458,GSM277459,GSM277460
0,780_100616237,9.22931,10.0032,9.30767,9.0761,9.11024,9.72083,8.72971,8.90435,9.69005,...,9.25351,9.22073,8.57829,9.07431,8.49272,9.22588,8.38277,9.631,9.10148,9.48368
1,5982,5.39148,5.29239,5.62825,6.44412,6.52364,6.33108,5.42469,5.57647,5.78538,...,5.51713,6.02593,5.65758,5.24126,6.17968,5.07135,4.93298,5.36012,5.44296,5.72018
2,3310,4.70996,3.97881,4.2565,4.16672,3.99349,4.12451,4.61313,4.87139,4.14672,...,4.74301,3.85299,4.7638,4.14449,4.12519,4.31076,4.12814,4.02612,3.72344,4.29098
3,7849,7.6806,6.77488,6.61363,6.633,6.38885,6.35644,6.54569,7.44981,6.40147,...,7.64434,7.29865,7.08403,7.29734,6.86503,7.82811,7.25238,6.59125,6.98952,6.87036
4,2978,4.01431,3.95995,4.54025,3.80671,3.79425,4.0271,3.53781,4.59529,5.53552,...,4.14021,4.40469,3.96887,3.80741,3.86025,4.1564,3.8571,3.78671,5.35456,3.63059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54670,,4.13642,3.85175,3.11438,3.4906,3.66457,3.45437,3.84979,3.76412,3.46774,...,3.94062,3.64259,4.16687,4.15287,3.52261,5.89112,4.15062,3.98749,3.22242,3.62174
54671,,3.83022,5.38626,3.22656,4.8024,4.77174,4.58342,4.84535,3.95094,3.22799,...,3.90068,4.99221,6.07707,6.09459,3.17194,8.99034,7.10542,5.72407,4.13657,4.11837
54672,,2.546,2.64385,2.6668,2.68168,2.76723,2.53931,2.62879,2.55204,2.62161,...,2.6564,2.69649,2.79681,2.50518,2.7349,2.5435,2.74379,2.71894,2.54129,2.60643
54673,,3.0265,2.93269,3.00462,3.07677,3.18352,3.1268,3.11804,3.00764,2.9732,...,3.07183,3.12419,3.05274,2.93801,3.02692,3.09466,3.2194,2.86176,3.06397,3.06916


Removing all columns that have no Entrez mapping ...


Unnamed: 0,ID_REF,GSM277438,GSM277439,GSM277440,GSM277441,GSM277442,GSM277443,GSM277444,GSM277445,GSM277446,...,GSM277451,GSM277452,GSM277453,GSM277454,GSM277455,GSM277456,GSM277457,GSM277458,GSM277459,GSM277460
0,780_100616237,9.22931,10.0032,9.30767,9.0761,9.11024,9.72083,8.72971,8.90435,9.69005,...,9.25351,9.22073,8.57829,9.07431,8.49272,9.22588,8.38277,9.631,9.10148,9.48368
1,5982,5.39148,5.29239,5.62825,6.44412,6.52364,6.33108,5.42469,5.57647,5.78538,...,5.51713,6.02593,5.65758,5.24126,6.17968,5.07135,4.93298,5.36012,5.44296,5.72018
2,3310,4.70996,3.97881,4.2565,4.16672,3.99349,4.12451,4.61313,4.87139,4.14672,...,4.74301,3.85299,4.7638,4.14449,4.12519,4.31076,4.12814,4.02612,3.72344,4.29098
3,7849,7.6806,6.77488,6.61363,6.633,6.38885,6.35644,6.54569,7.44981,6.40147,...,7.64434,7.29865,7.08403,7.29734,6.86503,7.82811,7.25238,6.59125,6.98952,6.87036
4,2978,4.01431,3.95995,4.54025,3.80671,3.79425,4.0271,3.53781,4.59529,5.53552,...,4.14021,4.40469,3.96887,3.80741,3.86025,4.1564,3.8571,3.78671,5.35456,3.63059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54630,2597,13.1146,12.9779,13.2243,12.0855,12.333,12.285,12.3932,12.7652,13.3318,...,13.31,13.4574,12.7468,12.4973,12.4929,12.2392,12.7527,12.3476,13.4179,12.8906
54631,6772,7.9886,8.72232,9.17787,9.77727,9.42139,9.82111,9.64594,8.72924,9.29793,...,9.38264,9.6358,9.60305,9.70804,9.55141,9.406,8.85962,9.18736,8.74305,9.2127
54632,6772,2.79091,2.71875,2.92281,2.76876,2.82483,2.7006,2.94441,2.95206,2.79412,...,2.96349,2.87218,2.80204,2.74155,3.00887,2.92628,3.00912,2.81143,2.72656,3.16065
54633,6772,5.89503,5.75518,5.91592,6.03493,5.83476,6.12465,6.19756,6.01468,6.08821,...,6.01519,6.18364,6.04359,5.78975,5.92919,5.83103,6.06925,5.66492,6.19235,5.9404


Unique IDs: 22189 Actual Size: 45118
Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM277438,GSM277439,GSM277440,GSM277441,GSM277442,GSM277443,GSM277444,GSM277445,GSM277446,...,GSM277451,GSM277452,GSM277453,GSM277454,GSM277455,GSM277456,GSM277457,GSM277458,GSM277459,GSM277460
0,1,5.758550,5.310110,5.142150,4.757180,4.818810,4.465190,4.802660,5.221400,5.264590,...,5.647620,4.849330,5.162360,4.725040,4.989770,5.316330,4.887420,4.648650,5.063600,5.267190
1,2,5.117520,3.856325,4.048420,4.258180,3.814460,3.762595,4.054220,4.519285,4.160635,...,4.753965,3.867540,4.253600,5.042975,4.238850,5.040680,4.348980,4.096915,3.602425,4.032885
2,3,4.690800,3.497430,3.915220,3.605910,3.261530,3.334910,3.540520,4.086750,3.794120,...,4.504550,3.780820,3.595910,4.094430,4.301490,4.625610,3.963210,3.641880,3.339440,3.858400
3,9,5.392360,6.444350,6.139070,7.844490,7.870630,7.755910,7.912880,5.788700,7.056680,...,7.106320,7.978120,8.448960,7.871780,7.767570,8.362770,6.951750,6.990800,7.611170,7.511720
4,10,4.174920,3.932170,3.816890,3.690640,3.493600,3.788690,4.194220,4.105140,4.049400,...,4.240530,3.965820,4.202470,4.205250,4.118110,4.266980,4.264060,3.682170,3.805480,3.680820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,6.173761,6.135897,6.323396,6.234157,6.135398,6.142300,6.233282,5.980252,6.386642,...,5.874466,6.224072,6.041516,6.378672,6.175680,6.089834,6.174062,6.176078,6.035877,6.081190
22185,9802_102723983,4.860270,6.623030,6.472710,6.818810,6.860830,6.598230,5.128490,5.522690,6.394810,...,5.873390,5.853350,5.966700,5.653130,5.303620,5.838810,4.998280,6.151900,6.059330,5.971050
22186,984_728642,6.103880,6.801940,6.672018,6.840234,6.748994,6.541474,6.491098,6.455626,6.449356,...,6.295628,6.693628,6.528024,5.907474,6.411964,5.632408,6.414554,6.195446,6.841934,6.921292
22187,9859_645455,6.473700,6.648677,6.525823,6.793677,6.338787,6.628923,6.548287,6.549110,6.355447,...,5.955460,6.225863,6.094677,6.303603,6.889797,6.306497,6.519297,6.477630,6.287457,6.060230


Checking for spurious gene IDs (if any) ...
Small Additional Crosscheck ...
(array([], dtype=int64),)


Unnamed: 0,ID_REF,GSM277438,GSM277439,GSM277440,GSM277441,GSM277442,GSM277443,GSM277444,GSM277445,GSM277446,...,GSM277451,GSM277452,GSM277453,GSM277454,GSM277455,GSM277456,GSM277457,GSM277458,GSM277459,GSM277460
0,1,5.758550,5.310110,5.142150,4.757180,4.818810,4.465190,4.802660,5.221400,5.264590,...,5.647620,4.849330,5.162360,4.725040,4.989770,5.316330,4.887420,4.648650,5.063600,5.267190
1,2,5.117520,3.856325,4.048420,4.258180,3.814460,3.762595,4.054220,4.519285,4.160635,...,4.753965,3.867540,4.253600,5.042975,4.238850,5.040680,4.348980,4.096915,3.602425,4.032885
2,3,4.690800,3.497430,3.915220,3.605910,3.261530,3.334910,3.540520,4.086750,3.794120,...,4.504550,3.780820,3.595910,4.094430,4.301490,4.625610,3.963210,3.641880,3.339440,3.858400
3,9,5.392360,6.444350,6.139070,7.844490,7.870630,7.755910,7.912880,5.788700,7.056680,...,7.106320,7.978120,8.448960,7.871780,7.767570,8.362770,6.951750,6.990800,7.611170,7.511720
4,10,4.174920,3.932170,3.816890,3.690640,3.493600,3.788690,4.194220,4.105140,4.049400,...,4.240530,3.965820,4.202470,4.205250,4.118110,4.266980,4.264060,3.682170,3.805480,3.680820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,6.173761,6.135897,6.323396,6.234157,6.135398,6.142300,6.233282,5.980252,6.386642,...,5.874466,6.224072,6.041516,6.378672,6.175680,6.089834,6.174062,6.176078,6.035877,6.081190
22185,9802_102723983,4.860270,6.623030,6.472710,6.818810,6.860830,6.598230,5.128490,5.522690,6.394810,...,5.873390,5.853350,5.966700,5.653130,5.303620,5.838810,4.998280,6.151900,6.059330,5.971050
22186,984_728642,6.103880,6.801940,6.672018,6.840234,6.748994,6.541474,6.491098,6.455626,6.449356,...,6.295628,6.693628,6.528024,5.907474,6.411964,5.632408,6.414554,6.195446,6.841934,6.921292
22187,9859_645455,6.473700,6.648677,6.525823,6.793677,6.338787,6.628923,6.548287,6.549110,6.355447,...,5.955460,6.225863,6.094677,6.303603,6.889797,6.306497,6.519297,6.477630,6.287457,6.060230


Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM277438,GSM277439,GSM277440,GSM277441,GSM277442,GSM277443,GSM277444,GSM277445,GSM277446,...,GSM277451,GSM277452,GSM277453,GSM277454,GSM277455,GSM277456,GSM277457,GSM277458,GSM277459,GSM277460
0,1,5.758550,5.310110,5.142150,4.757180,4.818810,4.465190,4.802660,5.221400,5.264590,...,5.647620,4.849330,5.162360,4.725040,4.989770,5.316330,4.887420,4.648650,5.063600,5.267190
1,2,5.117520,3.856325,4.048420,4.258180,3.814460,3.762595,4.054220,4.519285,4.160635,...,4.753965,3.867540,4.253600,5.042975,4.238850,5.040680,4.348980,4.096915,3.602425,4.032885
2,3,4.690800,3.497430,3.915220,3.605910,3.261530,3.334910,3.540520,4.086750,3.794120,...,4.504550,3.780820,3.595910,4.094430,4.301490,4.625610,3.963210,3.641880,3.339440,3.858400
3,9,5.392360,6.444350,6.139070,7.844490,7.870630,7.755910,7.912880,5.788700,7.056680,...,7.106320,7.978120,8.448960,7.871780,7.767570,8.362770,6.951750,6.990800,7.611170,7.511720
4,10,4.174920,3.932170,3.816890,3.690640,3.493600,3.788690,4.194220,4.105140,4.049400,...,4.240530,3.965820,4.202470,4.205250,4.118110,4.266980,4.264060,3.682170,3.805480,3.680820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,6.173761,6.135897,6.323396,6.234157,6.135398,6.142300,6.233282,5.980252,6.386642,...,5.874466,6.224072,6.041516,6.378672,6.175680,6.089834,6.174062,6.176078,6.035877,6.081190
22185,9802_102723983,4.860270,6.623030,6.472710,6.818810,6.860830,6.598230,5.128490,5.522690,6.394810,...,5.873390,5.853350,5.966700,5.653130,5.303620,5.838810,4.998280,6.151900,6.059330,5.971050
22186,984_728642,6.103880,6.801940,6.672018,6.840234,6.748994,6.541474,6.491098,6.455626,6.449356,...,6.295628,6.693628,6.528024,5.907474,6.411964,5.632408,6.414554,6.195446,6.841934,6.921292
22187,9859_645455,6.473700,6.648677,6.525823,6.793677,6.338787,6.628923,6.548287,6.549110,6.355447,...,5.955460,6.225863,6.094677,6.303603,6.889797,6.306497,6.519297,6.477630,6.287457,6.060230


Scanning for additional columns to add ...


100%|██████████| 25180/25180 [00:07<00:00, 3512.00it/s]


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,393046,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM277438,5.75855,5.11752,4.6908,5.9242,5.39236,4.17492,8.66519,10.1912,7.5254,3.35787,...,,6.39367,6.28058,3.61285,3.54311,5.4499,,4.74477,2.73325,0
GSM277439,5.31011,3.85633,3.49743,5.93624,6.44435,3.93217,8.52584,9.67913,7.45636,3.62648,...,,6.3163,8.20339,3.28341,3.19928,5.99924,,4.30335,2.71341,0
GSM277440,5.14215,4.04842,3.91522,5.06035,6.13907,3.81689,8.39376,10.6059,7.77029,3.30213,...,,6.38528,8.57305,3.47464,3.24418,5.59391,,4.60007,2.60483,0
GSM277441,4.75718,4.25818,3.60591,6.03078,7.84449,3.69064,6.43664,7.5239,7.57938,3.34913,...,,6.44173,7.82463,3.43328,3.20432,6.60229,,4.83455,2.70378,0
GSM277442,4.81881,3.81446,3.26153,5.8856,7.87063,3.4936,9.82525,7.94554,7.47608,3.66622,...,,6.30288,7.56185,3.28761,3.20254,6.1368,,5.023,2.58331,0
GSM277443,4.46519,3.7626,3.33491,5.49077,7.75591,3.78869,7.69337,7.51736,7.51143,3.48998,...,,6.23771,8.12283,3.36182,3.26862,5.89272,,4.9598,2.86524,0
GSM277444,4.80266,4.05422,3.54052,5.6466,7.91288,4.19422,9.17109,9.16068,7.65276,3.5679,...,,6.14407,7.92783,3.38923,3.4233,5.80872,,5.50698,2.73436,0
GSM277445,5.2214,4.51928,4.08675,6.39472,5.7887,4.10514,11.086,10.6004,7.30982,3.55917,...,,5.80246,6.64152,3.59661,3.46549,5.34612,,5.57548,3.03099,0
GSM277446,5.26459,4.16064,3.79412,6.8619,7.05668,4.0494,8.33333,9.22212,7.28077,3.48998,...,,6.2108,8.32961,3.47901,3.26722,5.82566,,4.9345,2.84528,0
GSM277447,5.52572,4.39996,4.15511,6.32062,6.67637,4.774,10.5041,10.3,7.8734,3.55527,...,,6.2253,7.63192,3.2311,3.67804,5.8441,,4.48411,2.90742,0


GDS4132


Unnamed: 0,ID_REF,GSM201542,GSM201543,GSM201544,GSM201545,GSM201829,GSM201830,GSM201831,GSM201832,GSM201833,...,GSM201835,GSM201836,GSM201837,GSM201838,GSM201839,GSM201840,GSM201841,GSM201842,GSM201843,GSM201844
0,780_100616237,564,602.786,748.264,677.546,640.927,647.959,654.743,654.681,614.747,...,625.259,579.565,616.358,659.955,709.862,551.21,612.343,638.982,617.984,538.935
1,5982,89.9426,80.9309,132.771,118.68,89.1658,90.0681,63.4989,99.854,82.0194,...,57.6767,89.0008,83.0726,96.4821,77.7915,79.5664,53.0871,123.829,130.807,76.9493
2,3310,227.829,240.16,191.737,200.043,217.188,249.35,225.285,195.772,190.766,...,208.767,196.029,191.193,159.151,185.322,170.749,174.966,161.157,176.349,198.395
3,7849,819.361,877.348,1064.08,1120.31,902.493,871.622,978.576,768.043,992.209,...,911.378,990.562,1148.45,1277.3,968.156,1026.71,863.915,1221.94,1119.06,1028.62
4,2978,33.2482,31.7194,36.789,46.8138,35.7579,35.0303,29.7765,29.6726,27.6191,...,50.4858,21.6945,31.5906,26.0352,53.0874,26.4713,53.957,18.8646,47.7717,41.4377
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54670,,37.3114,36.845,42.6567,47.2945,38.6019,37.6885,42.9072,41.4406,37.1612,...,30.9152,39.9293,34.3232,38.8259,32.6104,30.6414,22.3577,37.0325,43.9832,32.044
54671,,15.8929,13.4949,18.0175,16.5661,14.2826,15.4712,13.1479,16.1659,12.4001,...,10.9887,11.9312,9.79184,10.734,12.665,20.1934,13.5573,11.978,17.5294,11.5064
54672,,11.1081,13.9345,9.93043,9.44505,11.7873,12.6125,12.5811,11.3237,8.65618,...,9.39193,9.01661,7.41827,7.35424,10.0266,8.86245,10.0976,8.34554,8.98414,7.78422
54673,,40.1626,36.0268,28.1912,26.7917,28.6702,34.0897,22.1207,30.4964,23.9832,...,16.3013,16.212,13.1053,14.997,11.4312,15.8626,12.5002,15.5034,18.7875,17.0389


Removing all columns that have no Entrez mapping ...


Unnamed: 0,ID_REF,GSM201542,GSM201543,GSM201544,GSM201545,GSM201829,GSM201830,GSM201831,GSM201832,GSM201833,...,GSM201835,GSM201836,GSM201837,GSM201838,GSM201839,GSM201840,GSM201841,GSM201842,GSM201843,GSM201844
0,780_100616237,564,602.786,748.264,677.546,640.927,647.959,654.743,654.681,614.747,...,625.259,579.565,616.358,659.955,709.862,551.21,612.343,638.982,617.984,538.935
1,5982,89.9426,80.9309,132.771,118.68,89.1658,90.0681,63.4989,99.854,82.0194,...,57.6767,89.0008,83.0726,96.4821,77.7915,79.5664,53.0871,123.829,130.807,76.9493
2,3310,227.829,240.16,191.737,200.043,217.188,249.35,225.285,195.772,190.766,...,208.767,196.029,191.193,159.151,185.322,170.749,174.966,161.157,176.349,198.395
3,7849,819.361,877.348,1064.08,1120.31,902.493,871.622,978.576,768.043,992.209,...,911.378,990.562,1148.45,1277.3,968.156,1026.71,863.915,1221.94,1119.06,1028.62
4,2978,33.2482,31.7194,36.789,46.8138,35.7579,35.0303,29.7765,29.6726,27.6191,...,50.4858,21.6945,31.5906,26.0352,53.0874,26.4713,53.957,18.8646,47.7717,41.4377
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54630,2597,15393.2,13097.4,14866.6,17848,12110.4,11651.3,13944.2,12509.5,12957.4,...,12970.4,13571.2,16830.6,14350.2,11215.6,12320.4,13158.2,18087.3,15058.4,12769.6
54631,6772,418.556,258.998,335.109,347.851,392.356,373.306,361.519,354.681,419.434,...,480.988,367.566,400.134,449.049,526.296,473.286,420.962,740.483,431.416,420.392
54632,6772,146.191,145.681,151.092,178.201,158.827,195.927,160.39,130.946,112.516,...,153.731,112.041,125.238,101.202,174.132,137.813,132.967,100.5,116.084,158.06
54633,6772,127.122,98.8764,143.331,129.416,116.88,108.698,99.4492,120.766,127.199,...,88.8751,84.7173,105.884,113.086,104.829,100.753,102.122,112.953,110.163,102.177


Unique IDs: 22189 Actual Size: 45118
Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM201542,GSM201543,GSM201544,GSM201545,GSM201829,GSM201830,GSM201831,GSM201832,GSM201833,...,GSM201835,GSM201836,GSM201837,GSM201838,GSM201839,GSM201840,GSM201841,GSM201842,GSM201843,GSM201844
0,1,157.416000,151.416000,158.66000,142.300000,135.083000,166.192000,170.987000,158.883000,127.677000,...,109.808000,120.370000,116.185000,119.462000,112.974000,92.575800,98.849000,132.406000,116.623000,99.45250
1,2,619.794500,528.307000,498.66675,1067.830800,734.189000,841.273350,621.137000,525.799500,760.987600,...,799.801600,798.051200,619.069900,1300.369800,798.340400,962.552950,787.223450,676.794100,579.744050,832.92965
2,3,259.838000,282.330000,288.07900,309.033000,265.400000,302.537000,266.648000,185.299000,232.188000,...,262.568000,221.508000,241.523000,272.261000,318.578000,259.579000,214.394000,229.171000,228.643000,268.37600
3,9,21.052900,17.224900,23.17590,29.798900,32.930300,17.792200,19.792600,14.575700,22.630400,...,14.656300,19.280500,28.135700,30.838600,20.077700,21.713000,21.028700,25.872800,42.194600,31.93040
4,10,407.872000,440.123000,575.91200,529.650000,429.908000,448.612000,523.805000,461.944000,414.368000,...,341.764000,347.183000,373.825000,282.363000,453.135000,496.336000,469.028000,312.561000,338.998000,420.38800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,387.845767,323.705378,372.86470,378.346222,388.222644,353.046067,321.302133,439.674533,388.980556,...,398.991011,388.934267,401.449178,515.068267,384.432144,418.533156,414.651011,349.302867,365.548433,408.81490
22185,9802_102723983,149.326000,140.780000,103.66700,131.889000,148.371000,151.653000,158.630000,113.481000,121.283000,...,163.351000,154.583000,121.048000,108.229000,120.331000,137.484000,155.571000,130.019000,124.339000,181.90600
22186,984_728642,216.705800,213.984600,243.92468,197.127320,221.762000,218.261000,236.563740,229.475440,183.065660,...,177.160400,206.369120,195.576960,198.763680,161.247640,178.884720,178.980720,189.599720,207.863180,191.59170
22187,9859_645455,275.596833,203.708900,262.47700,297.684467,260.123500,240.226833,233.253267,234.444200,250.036133,...,261.375667,266.938733,297.041300,306.454600,271.508100,342.144033,273.487167,195.987100,255.311600,302.02240


Checking for spurious gene IDs (if any) ...
Small Additional Crosscheck ...
(array([], dtype=int64),)


Unnamed: 0,ID_REF,GSM201542,GSM201543,GSM201544,GSM201545,GSM201829,GSM201830,GSM201831,GSM201832,GSM201833,...,GSM201835,GSM201836,GSM201837,GSM201838,GSM201839,GSM201840,GSM201841,GSM201842,GSM201843,GSM201844
0,1,157.416000,151.416000,158.66000,142.300000,135.083000,166.192000,170.987000,158.883000,127.677000,...,109.808000,120.370000,116.185000,119.462000,112.974000,92.575800,98.849000,132.406000,116.623000,99.45250
1,2,619.794500,528.307000,498.66675,1067.830800,734.189000,841.273350,621.137000,525.799500,760.987600,...,799.801600,798.051200,619.069900,1300.369800,798.340400,962.552950,787.223450,676.794100,579.744050,832.92965
2,3,259.838000,282.330000,288.07900,309.033000,265.400000,302.537000,266.648000,185.299000,232.188000,...,262.568000,221.508000,241.523000,272.261000,318.578000,259.579000,214.394000,229.171000,228.643000,268.37600
3,9,21.052900,17.224900,23.17590,29.798900,32.930300,17.792200,19.792600,14.575700,22.630400,...,14.656300,19.280500,28.135700,30.838600,20.077700,21.713000,21.028700,25.872800,42.194600,31.93040
4,10,407.872000,440.123000,575.91200,529.650000,429.908000,448.612000,523.805000,461.944000,414.368000,...,341.764000,347.183000,373.825000,282.363000,453.135000,496.336000,469.028000,312.561000,338.998000,420.38800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,387.845767,323.705378,372.86470,378.346222,388.222644,353.046067,321.302133,439.674533,388.980556,...,398.991011,388.934267,401.449178,515.068267,384.432144,418.533156,414.651011,349.302867,365.548433,408.81490
22185,9802_102723983,149.326000,140.780000,103.66700,131.889000,148.371000,151.653000,158.630000,113.481000,121.283000,...,163.351000,154.583000,121.048000,108.229000,120.331000,137.484000,155.571000,130.019000,124.339000,181.90600
22186,984_728642,216.705800,213.984600,243.92468,197.127320,221.762000,218.261000,236.563740,229.475440,183.065660,...,177.160400,206.369120,195.576960,198.763680,161.247640,178.884720,178.980720,189.599720,207.863180,191.59170
22187,9859_645455,275.596833,203.708900,262.47700,297.684467,260.123500,240.226833,233.253267,234.444200,250.036133,...,261.375667,266.938733,297.041300,306.454600,271.508100,342.144033,273.487167,195.987100,255.311600,302.02240


Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM201542,GSM201543,GSM201544,GSM201545,GSM201829,GSM201830,GSM201831,GSM201832,GSM201833,...,GSM201835,GSM201836,GSM201837,GSM201838,GSM201839,GSM201840,GSM201841,GSM201842,GSM201843,GSM201844
0,1,157.416000,151.416000,158.66000,142.300000,135.083000,166.192000,170.987000,158.883000,127.677000,...,109.808000,120.370000,116.185000,119.462000,112.974000,92.575800,98.849000,132.406000,116.623000,99.45250
1,2,619.794500,528.307000,498.66675,1067.830800,734.189000,841.273350,621.137000,525.799500,760.987600,...,799.801600,798.051200,619.069900,1300.369800,798.340400,962.552950,787.223450,676.794100,579.744050,832.92965
2,3,259.838000,282.330000,288.07900,309.033000,265.400000,302.537000,266.648000,185.299000,232.188000,...,262.568000,221.508000,241.523000,272.261000,318.578000,259.579000,214.394000,229.171000,228.643000,268.37600
3,9,21.052900,17.224900,23.17590,29.798900,32.930300,17.792200,19.792600,14.575700,22.630400,...,14.656300,19.280500,28.135700,30.838600,20.077700,21.713000,21.028700,25.872800,42.194600,31.93040
4,10,407.872000,440.123000,575.91200,529.650000,429.908000,448.612000,523.805000,461.944000,414.368000,...,341.764000,347.183000,373.825000,282.363000,453.135000,496.336000,469.028000,312.561000,338.998000,420.38800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,387.845767,323.705378,372.86470,378.346222,388.222644,353.046067,321.302133,439.674533,388.980556,...,398.991011,388.934267,401.449178,515.068267,384.432144,418.533156,414.651011,349.302867,365.548433,408.81490
22185,9802_102723983,149.326000,140.780000,103.66700,131.889000,148.371000,151.653000,158.630000,113.481000,121.283000,...,163.351000,154.583000,121.048000,108.229000,120.331000,137.484000,155.571000,130.019000,124.339000,181.90600
22186,984_728642,216.705800,213.984600,243.92468,197.127320,221.762000,218.261000,236.563740,229.475440,183.065660,...,177.160400,206.369120,195.576960,198.763680,161.247640,178.884720,178.980720,189.599720,207.863180,191.59170
22187,9859_645455,275.596833,203.708900,262.47700,297.684467,260.123500,240.226833,233.253267,234.444200,250.036133,...,261.375667,266.938733,297.041300,306.454600,271.508100,342.144033,273.487167,195.987100,255.311600,302.02240


Scanning for additional columns to add ...


100%|██████████| 25180/25180 [00:07<00:00, 3453.45it/s]


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,393046,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM201542,157.416,619.794,259.838,92.6128,21.0529,407.872,225.69,122.944,328.8,99.1529,...,,198.349,112.244,23.4929,93.2298,258.505,,125.26,61.0159,1
GSM201543,151.416,528.307,282.33,83.2014,17.2249,440.123,179.091,121.109,335.461,89.1627,...,,186.683,109.731,22.1188,67.7522,343.543,,141.425,59.5491,1
GSM201544,158.66,498.667,288.079,104.579,23.1759,575.912,221.884,112.59,369.503,99.7765,...,,198.174,164.234,23.1022,90.5344,414.094,,108.178,72.1817,1
GSM201545,142.3,1067.83,309.033,95.9926,29.7989,529.65,229.221,129.36,330.036,90.5101,...,,181.309,153.284,27.0541,118.429,295.869,,116.93,87.0371,1
GSM201829,135.083,734.189,265.4,140.819,32.9303,429.908,206.98,122.318,335.185,69.5371,...,,176.753,144.356,26.4946,98.0888,333.134,,130.044,71.0189,1
GSM201830,166.192,841.273,302.537,122.891,17.7922,448.612,240.301,129.327,351.465,117.028,...,,228.628,115.389,30.136,72.39,400.276,,132.464,73.5697,1
GSM201831,170.987,621.137,266.648,85.6091,19.7926,523.805,260.61,136.176,358.468,73.1073,...,,188.526,122.201,17.5226,90.2973,408.425,,140.818,80.8475,1
GSM201832,158.883,525.799,185.299,160.596,14.5757,461.944,240.374,109.897,303.971,96.1622,...,,212.953,127.337,23.7365,78.1862,387.553,,91.7474,50.9081,1
GSM201833,127.677,760.988,232.188,112.538,22.6304,414.368,316.914,117.896,312.467,67.6563,...,,177.369,148.968,16.3673,94.0525,397.993,,121.101,73.8363,1
GSM201834,144.03,717.427,310.909,110.484,24.8493,498.246,219.584,117.24,327.207,77.3874,...,,155.953,148.721,20.8102,91.812,249.084,,111.133,85.2232,1


GDS4133


Unnamed: 0,ID_REF,GSM201849,GSM201850,GSM201851,GSM201852,GSM201853,GSM201854,GSM201855,GSM201856,GSM201857,...,GSM201863,GSM201864,GSM201865,GSM201866,GSM201867,GSM201868,GSM201869,GSM201870,GSM201871,GSM201872
0,780_100616237,483.409,590.238,543.701,482.75,557.421,566.204,620.133,503.949,514.27,...,594.256,557.821,603.926,511.958,548.065,679.43,615.393,580.235,587.079,594.236
1,5982,66.9167,83.4537,78.4425,74.2555,104.467,79.028,84.5327,51.4189,61.9924,...,91.1624,74.4151,69.4337,81.8066,74.4412,119.824,107.552,81.2031,83.0794,58.2355
2,3310,219.597,210.304,248.096,176.755,159.189,185.612,201.865,212.898,162.053,...,177.66,167.325,154.766,205.902,221.009,167.39,173.965,196.353,228.719,203.703
3,7849,755.41,683.005,717.778,870.41,892.417,722.058,855.179,716.303,998.017,...,700.59,897.979,820.625,748.113,800.767,964.539,1013,815.482,797.76,888.794
4,2978,29.3928,26.7009,29.0265,26.9254,29.2165,35.0222,46.7473,22.1934,39.7672,...,27.3372,25.2721,37.2497,30.4781,29.1977,33.4887,42.3393,32.5193,31.9544,27.1104
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54670,,33.0276,50.0206,32.2976,66.637,52.7042,41.1143,56.3113,36.1342,45.0641,...,44.7051,40.5808,49.6982,40.404,34.5954,46.1147,56.9342,43.7577,37.9943,50.0639
54671,,13.6201,15.471,12.9002,11.0992,11.4834,14.4994,16.9573,10.9156,10.8589,...,15.9843,10.9018,12.0039,15.2765,12.594,16.743,15.1504,12.9059,14.0305,11.8148
54672,,14.3735,12.0054,15.6121,9.96302,10.6655,14.8697,13.789,14.0409,9.79406,...,13.6399,11.0706,11.7936,13.7013,14.0585,14.8552,16.2034,12.8904,12.7716,11.8375
54673,,17.3238,27.5912,26.1336,14.5015,23.0248,19.4359,23.6878,17.8598,12.4973,...,27.7578,21.6885,15.8557,36.4988,32.6399,25.5199,24.2106,26.0899,30.7728,20.0653


Removing all columns that have no Entrez mapping ...


Unnamed: 0,ID_REF,GSM201849,GSM201850,GSM201851,GSM201852,GSM201853,GSM201854,GSM201855,GSM201856,GSM201857,...,GSM201863,GSM201864,GSM201865,GSM201866,GSM201867,GSM201868,GSM201869,GSM201870,GSM201871,GSM201872
0,780_100616237,483.409,590.238,543.701,482.75,557.421,566.204,620.133,503.949,514.27,...,594.256,557.821,603.926,511.958,548.065,679.43,615.393,580.235,587.079,594.236
1,5982,66.9167,83.4537,78.4425,74.2555,104.467,79.028,84.5327,51.4189,61.9924,...,91.1624,74.4151,69.4337,81.8066,74.4412,119.824,107.552,81.2031,83.0794,58.2355
2,3310,219.597,210.304,248.096,176.755,159.189,185.612,201.865,212.898,162.053,...,177.66,167.325,154.766,205.902,221.009,167.39,173.965,196.353,228.719,203.703
3,7849,755.41,683.005,717.778,870.41,892.417,722.058,855.179,716.303,998.017,...,700.59,897.979,820.625,748.113,800.767,964.539,1013,815.482,797.76,888.794
4,2978,29.3928,26.7009,29.0265,26.9254,29.2165,35.0222,46.7473,22.1934,39.7672,...,27.3372,25.2721,37.2497,30.4781,29.1977,33.4887,42.3393,32.5193,31.9544,27.1104
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54630,2597,10931.3,11944.6,10389.4,14630.2,12539.5,11402.4,11945,12062.8,10063,...,11356.9,11752.4,11905.3,13955.5,11881.1,13475.3,16160.4,10983.4,10576.2,12642
54631,6772,300.917,431.095,283.026,259.817,389.257,380.868,304.337,293.26,365.258,...,321.986,380.648,346.425,379.612,234.65,304.965,315.949,356.088,338.524,328.358
54632,6772,25.8003,33.63,26.4349,21.1554,36.277,29.4034,39.045,22.1289,27.0487,...,33.3839,38.6913,30.8764,30.3744,32.264,37.3159,38.3286,31.4284,31.8472,21.8706
54633,6772,116.296,130.65,120.697,152.408,161.632,132.58,160.346,97.1815,139.866,...,129.63,167.968,163.537,136.047,131.602,161.189,163.236,140.697,131.494,138.6


Unique IDs: 22189 Actual Size: 45118
Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM201849,GSM201850,GSM201851,GSM201852,GSM201853,GSM201854,GSM201855,GSM201856,GSM201857,...,GSM201863,GSM201864,GSM201865,GSM201866,GSM201867,GSM201868,GSM201869,GSM201870,GSM201871,GSM201872
0,1,118.261000,131.127000,152.805000,102.078000,114.537000,122.578000,143.783000,115.312000,86.909200,...,148.849000,112.962000,128.383000,144.797000,143.236000,139.976000,128.075000,131.846000,159.476000,152.316000
1,2,666.033350,805.182000,642.768000,738.833550,585.731000,773.909200,608.409500,548.047500,769.732650,...,485.790500,709.860850,662.422000,585.397500,496.119000,469.721500,988.753000,678.197500,774.100650,573.426500
2,3,187.786000,201.122000,185.160000,350.734000,249.402000,238.207000,293.166000,172.000000,250.386000,...,167.945000,210.618000,281.855000,235.432000,255.851000,261.033000,280.239000,240.752000,274.268000,241.654000
3,9,17.116100,21.786400,18.458000,42.405400,34.989700,35.992700,37.413900,20.120400,37.183400,...,15.348300,27.330300,31.541600,22.871300,18.292900,25.450700,38.302400,31.995200,18.605300,20.824800
4,10,132.988000,88.068700,105.223000,143.751000,99.260200,121.867000,99.857600,72.784600,137.829000,...,84.947700,89.488200,108.520000,97.667900,113.057000,88.230600,107.511000,89.776200,110.656000,87.796500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,325.729100,379.271356,300.949144,240.359456,395.523878,313.869911,346.627544,333.952644,356.460222,...,398.086956,352.902256,328.196656,352.141011,292.843956,337.921689,343.214800,352.073489,319.393589,291.634533
22185,9802_102723983,158.782000,140.192000,171.272000,135.387000,108.797000,138.909000,139.585000,148.336000,142.798000,...,103.438000,109.576000,110.908000,136.564000,127.549000,94.221600,119.435000,134.537000,137.886000,144.104000
22186,984_728642,165.930720,192.639640,190.222620,165.662720,182.888440,160.505700,207.868140,197.457680,149.689440,...,205.984660,159.299820,165.120940,189.367220,184.698520,217.764200,176.506340,194.003940,191.565500,205.686260
22187,9859_645455,197.779733,208.775133,201.891833,207.170000,269.501400,207.712900,211.789733,202.688867,247.095733,...,214.218900,227.404367,253.115567,250.842667,185.600367,238.338467,269.594833,235.793533,218.544700,212.025033


Checking for spurious gene IDs (if any) ...
Small Additional Crosscheck ...
(array([], dtype=int64),)


Unnamed: 0,ID_REF,GSM201849,GSM201850,GSM201851,GSM201852,GSM201853,GSM201854,GSM201855,GSM201856,GSM201857,...,GSM201863,GSM201864,GSM201865,GSM201866,GSM201867,GSM201868,GSM201869,GSM201870,GSM201871,GSM201872
0,1,118.261000,131.127000,152.805000,102.078000,114.537000,122.578000,143.783000,115.312000,86.909200,...,148.849000,112.962000,128.383000,144.797000,143.236000,139.976000,128.075000,131.846000,159.476000,152.316000
1,2,666.033350,805.182000,642.768000,738.833550,585.731000,773.909200,608.409500,548.047500,769.732650,...,485.790500,709.860850,662.422000,585.397500,496.119000,469.721500,988.753000,678.197500,774.100650,573.426500
2,3,187.786000,201.122000,185.160000,350.734000,249.402000,238.207000,293.166000,172.000000,250.386000,...,167.945000,210.618000,281.855000,235.432000,255.851000,261.033000,280.239000,240.752000,274.268000,241.654000
3,9,17.116100,21.786400,18.458000,42.405400,34.989700,35.992700,37.413900,20.120400,37.183400,...,15.348300,27.330300,31.541600,22.871300,18.292900,25.450700,38.302400,31.995200,18.605300,20.824800
4,10,132.988000,88.068700,105.223000,143.751000,99.260200,121.867000,99.857600,72.784600,137.829000,...,84.947700,89.488200,108.520000,97.667900,113.057000,88.230600,107.511000,89.776200,110.656000,87.796500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,325.729100,379.271356,300.949144,240.359456,395.523878,313.869911,346.627544,333.952644,356.460222,...,398.086956,352.902256,328.196656,352.141011,292.843956,337.921689,343.214800,352.073489,319.393589,291.634533
22185,9802_102723983,158.782000,140.192000,171.272000,135.387000,108.797000,138.909000,139.585000,148.336000,142.798000,...,103.438000,109.576000,110.908000,136.564000,127.549000,94.221600,119.435000,134.537000,137.886000,144.104000
22186,984_728642,165.930720,192.639640,190.222620,165.662720,182.888440,160.505700,207.868140,197.457680,149.689440,...,205.984660,159.299820,165.120940,189.367220,184.698520,217.764200,176.506340,194.003940,191.565500,205.686260
22187,9859_645455,197.779733,208.775133,201.891833,207.170000,269.501400,207.712900,211.789733,202.688867,247.095733,...,214.218900,227.404367,253.115567,250.842667,185.600367,238.338467,269.594833,235.793533,218.544700,212.025033


Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM201849,GSM201850,GSM201851,GSM201852,GSM201853,GSM201854,GSM201855,GSM201856,GSM201857,...,GSM201863,GSM201864,GSM201865,GSM201866,GSM201867,GSM201868,GSM201869,GSM201870,GSM201871,GSM201872
0,1,118.261000,131.127000,152.805000,102.078000,114.537000,122.578000,143.783000,115.312000,86.909200,...,148.849000,112.962000,128.383000,144.797000,143.236000,139.976000,128.075000,131.846000,159.476000,152.316000
1,2,666.033350,805.182000,642.768000,738.833550,585.731000,773.909200,608.409500,548.047500,769.732650,...,485.790500,709.860850,662.422000,585.397500,496.119000,469.721500,988.753000,678.197500,774.100650,573.426500
2,3,187.786000,201.122000,185.160000,350.734000,249.402000,238.207000,293.166000,172.000000,250.386000,...,167.945000,210.618000,281.855000,235.432000,255.851000,261.033000,280.239000,240.752000,274.268000,241.654000
3,9,17.116100,21.786400,18.458000,42.405400,34.989700,35.992700,37.413900,20.120400,37.183400,...,15.348300,27.330300,31.541600,22.871300,18.292900,25.450700,38.302400,31.995200,18.605300,20.824800
4,10,132.988000,88.068700,105.223000,143.751000,99.260200,121.867000,99.857600,72.784600,137.829000,...,84.947700,89.488200,108.520000,97.667900,113.057000,88.230600,107.511000,89.776200,110.656000,87.796500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,325.729100,379.271356,300.949144,240.359456,395.523878,313.869911,346.627544,333.952644,356.460222,...,398.086956,352.902256,328.196656,352.141011,292.843956,337.921689,343.214800,352.073489,319.393589,291.634533
22185,9802_102723983,158.782000,140.192000,171.272000,135.387000,108.797000,138.909000,139.585000,148.336000,142.798000,...,103.438000,109.576000,110.908000,136.564000,127.549000,94.221600,119.435000,134.537000,137.886000,144.104000
22186,984_728642,165.930720,192.639640,190.222620,165.662720,182.888440,160.505700,207.868140,197.457680,149.689440,...,205.984660,159.299820,165.120940,189.367220,184.698520,217.764200,176.506340,194.003940,191.565500,205.686260
22187,9859_645455,197.779733,208.775133,201.891833,207.170000,269.501400,207.712900,211.789733,202.688867,247.095733,...,214.218900,227.404367,253.115567,250.842667,185.600367,238.338467,269.594833,235.793533,218.544700,212.025033


Scanning for additional columns to add ...


100%|██████████| 25180/25180 [00:07<00:00, 3520.84it/s]


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,393046,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM201849,118.261,666.033,187.786,121.851,17.1161,132.988,157.22,123.814,279.946,64.6772,...,,189.303,104.089,30.5327,78.0891,249.876,,141.572,53.8534,0
GSM201850,131.127,805.182,201.122,106.731,21.7864,88.0687,167.941,124.759,311.144,78.3015,...,,196.058,124.006,37.079,97.8779,353.884,,136.211,65.314,0
GSM201851,152.805,642.768,185.16,121.705,18.458,105.223,147.754,128.911,321.726,80.5674,...,,194.151,98.7149,42.2795,86.238,291.438,,149.325,54.6427,0
GSM201852,102.078,738.834,350.734,90.2302,42.4054,143.751,163.69,130.23,245.525,53.4923,...,,134.504,160.423,46.8522,130.012,200.438,,151.576,111.44,0
GSM201853,114.537,585.731,249.402,113.644,34.9897,99.2602,134.141,101.814,250.197,63.516,...,,178.583,124.822,42.2096,86.8652,364.671,,109.15,65.7925,0
GSM201854,122.578,773.909,238.207,174.595,35.9927,121.867,155.063,115.91,265.093,86.2221,...,,192.915,146.566,31.0015,85.2481,191.103,,117.128,69.5504,0
GSM201855,143.783,608.409,293.166,92.129,37.4139,99.8576,158.265,118.466,305.104,80.4,...,,169.49,160.959,60.6654,113.112,298.752,,132.02,67.1144,0
GSM201856,115.312,548.048,172.0,106.785,20.1204,72.7846,124.386,115.75,247.468,69.2041,...,,197.946,91.0354,31.4984,77.5805,234.099,,120.831,48.0524,0
GSM201857,86.9092,769.733,250.386,116.826,37.1834,137.829,153.967,132.119,240.991,34.9418,...,,163.558,138.505,46.4424,116.656,318.738,,142.338,98.5721,0
GSM201858,83.3162,808.799,261.367,148.113,41.9982,105.871,142.384,109.291,264.478,34.3241,...,,205.932,132.19,37.9261,98.5696,219.264,,114.881,77.6905,0


GDS4399


Unnamed: 0,ID_REF,GSM850527,GSM850528,GSM850529,GSM850530,GSM850531,GSM850532,GSM850533,GSM850534,GSM850535,GSM850536
0,780_100616237,1827.7,2735.92,3528.27,2697.7,1474.9,3103.04,2102.58,1300.83,1397.22,1240.26
1,5982,98.7153,361.32,308.85,266.351,446.131,319.036,306.355,393.741,299.85,428.016
2,3310,53.1415,552.695,505.498,693.408,1321.53,1241.07,1801.42,1219.17,2204.95,2293.98
3,7849,510.73,476.999,535.266,631.601,459.353,663.871,469.711,444.47,287.089,356.295
4,2978,95.8522,75.366,92.8731,44.9358,36.2481,47.1329,9.52201,71.4039,111.063,73.081
...,...,...,...,...,...,...,...,...,...,...,...
54670,,24.4004,7.4676,3.27977,8.09418,5.01694,18.6029,3.37706,110.558,12.3915,8.04602
54671,,4.4367,38.5328,41.5537,52.556,12.3979,92.9243,50.0808,154.255,63.2097,20.2206
54672,,4.70751,1.85965,33.1628,3.50057,23.0304,45.2287,24.1926,21.0493,49.2679,4.10429
54673,,14.5444,9.62345,5.66041,6.05977,14.1844,11.5334,4.60563,32.9977,20.4848,102.923


Removing all columns that have no Entrez mapping ...


Unnamed: 0,ID_REF,GSM850527,GSM850528,GSM850529,GSM850530,GSM850531,GSM850532,GSM850533,GSM850534,GSM850535,GSM850536
0,780_100616237,1827.7,2735.92,3528.27,2697.7,1474.9,3103.04,2102.58,1300.83,1397.22,1240.26
1,5982,98.7153,361.32,308.85,266.351,446.131,319.036,306.355,393.741,299.85,428.016
2,3310,53.1415,552.695,505.498,693.408,1321.53,1241.07,1801.42,1219.17,2204.95,2293.98
3,7849,510.73,476.999,535.266,631.601,459.353,663.871,469.711,444.47,287.089,356.295
4,2978,95.8522,75.366,92.8731,44.9358,36.2481,47.1329,9.52201,71.4039,111.063,73.081
...,...,...,...,...,...,...,...,...,...,...,...
54630,2597,1627.6,17004.2,20088.9,10510.9,15705.9,23031.5,19088.1,21885.1,57351,63593.6
54631,6772,5529.21,5652.08,5465.79,13647,5038.43,6774.43,4937.98,4140.93,5112.45,4593.94
54632,6772,12.7202,32.7053,33.2779,8.7357,46.9238,104.041,33.8974,391.018,336.225,565.338
54633,6772,26.2936,227.907,204.714,229.164,232.818,486.962,286.408,689.011,2423.19,1880.77


Unique IDs: 22189 Actual Size: 45118
Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM850527,GSM850528,GSM850529,GSM850530,GSM850531,GSM850532,GSM850533,GSM850534,GSM850535,GSM850536
0,1,132.221000,140.994000,161.59300,151.210000,119.443000,50.037800,97.543000,239.208000,184.561000,113.501000
1,2,281.916000,385.726750,401.46250,632.388500,785.675150,839.423500,755.760850,1277.367000,1035.490500,1115.151100
2,3,426.536000,6.587680,5.74087,181.694000,8.892130,94.126900,51.708800,55.982300,46.721900,38.774100
3,9,1136.280000,1218.550000,1161.19000,1102.360000,1172.940000,1059.070000,951.852000,1414.090000,596.137000,502.494000
4,10,14.449500,6.179300,4.22469,17.548700,3.457470,6.843330,3.256820,19.417200,41.202200,10.998100
...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,4707.685429,2664.354499,2598.86729,3472.969937,2502.114063,2328.790522,2419.955956,2265.942256,2104.061011,2025.929111
22185,9802_102723983,199.751000,312.537000,295.57900,87.772000,305.804000,180.149000,312.169000,124.344000,229.813000,114.225000
22186,984_728642,189.905816,752.159720,670.14316,458.168020,836.861800,607.245780,866.390040,890.084200,1127.377000,1101.743400
22187,9859_645455,1370.101433,621.645800,565.56730,2342.174033,898.507833,979.913000,921.531667,811.969000,521.616667,655.471000


Checking for spurious gene IDs (if any) ...
Small Additional Crosscheck ...
(array([], dtype=int64),)


Unnamed: 0,ID_REF,GSM850527,GSM850528,GSM850529,GSM850530,GSM850531,GSM850532,GSM850533,GSM850534,GSM850535,GSM850536
0,1,132.221000,140.994000,161.59300,151.210000,119.443000,50.037800,97.543000,239.208000,184.561000,113.501000
1,2,281.916000,385.726750,401.46250,632.388500,785.675150,839.423500,755.760850,1277.367000,1035.490500,1115.151100
2,3,426.536000,6.587680,5.74087,181.694000,8.892130,94.126900,51.708800,55.982300,46.721900,38.774100
3,9,1136.280000,1218.550000,1161.19000,1102.360000,1172.940000,1059.070000,951.852000,1414.090000,596.137000,502.494000
4,10,14.449500,6.179300,4.22469,17.548700,3.457470,6.843330,3.256820,19.417200,41.202200,10.998100
...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,4707.685429,2664.354499,2598.86729,3472.969937,2502.114063,2328.790522,2419.955956,2265.942256,2104.061011,2025.929111
22185,9802_102723983,199.751000,312.537000,295.57900,87.772000,305.804000,180.149000,312.169000,124.344000,229.813000,114.225000
22186,984_728642,189.905816,752.159720,670.14316,458.168020,836.861800,607.245780,866.390040,890.084200,1127.377000,1101.743400
22187,9859_645455,1370.101433,621.645800,565.56730,2342.174033,898.507833,979.913000,921.531667,811.969000,521.616667,655.471000


Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM850527,GSM850528,GSM850529,GSM850530,GSM850531,GSM850532,GSM850533,GSM850534,GSM850535,GSM850536
0,1,132.221000,140.994000,161.59300,151.210000,119.443000,50.037800,97.543000,239.208000,184.561000,113.501000
1,2,281.916000,385.726750,401.46250,632.388500,785.675150,839.423500,755.760850,1277.367000,1035.490500,1115.151100
2,3,426.536000,6.587680,5.74087,181.694000,8.892130,94.126900,51.708800,55.982300,46.721900,38.774100
3,9,1136.280000,1218.550000,1161.19000,1102.360000,1172.940000,1059.070000,951.852000,1414.090000,596.137000,502.494000
4,10,14.449500,6.179300,4.22469,17.548700,3.457470,6.843330,3.256820,19.417200,41.202200,10.998100
...,...,...,...,...,...,...,...,...,...,...,...
22184,9782_724102,4707.685429,2664.354499,2598.86729,3472.969937,2502.114063,2328.790522,2419.955956,2265.942256,2104.061011,2025.929111
22185,9802_102723983,199.751000,312.537000,295.57900,87.772000,305.804000,180.149000,312.169000,124.344000,229.813000,114.225000
22186,984_728642,189.905816,752.159720,670.14316,458.168020,836.861800,607.245780,866.390040,890.084200,1127.377000,1101.743400
22187,9859_645455,1370.101433,621.645800,565.56730,2342.174033,898.507833,979.913000,921.531667,811.969000,521.616667,655.471000


Scanning for additional columns to add ...


100%|██████████| 25180/25180 [00:07<00:00, 3483.60it/s]


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,393046,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM850527,132.221,281.916,426.536,1763.09,1136.28,14.4495,424.969,5980.19,207.0,30.0271,...,,339.691,882.606,39.1957,23.6702,39.6391,,686.118,9.83481,0
GSM850528,140.994,385.727,6.58768,507.824,1218.55,6.1793,2083.93,4655.95,726.953,4.10135,...,,390.308,1429.08,32.3426,17.1339,122.616,,101.477,25.6234,0
GSM850529,161.593,401.462,5.74087,508.041,1161.19,4.22469,2106.79,3971.98,831.601,12.5056,...,,450.68,1377.76,29.8261,22.0154,124.557,,90.9056,53.3753,0
GSM850530,151.21,632.388,181.694,566.418,1102.36,17.5487,381.129,4070.33,524.135,6.66029,...,,410.836,1007.56,48.6136,14.8529,148.93,,599.075,50.2188,1
GSM850531,119.443,785.675,8.89213,313.484,1172.94,3.45747,3814.33,3507.39,583.993,6.99695,...,,547.116,1046.84,13.3062,11.8592,94.2125,,106.583,44.621,1
GSM850532,50.0378,839.423,94.1269,267.403,1059.07,6.84333,1094.49,3137.21,755.025,8.11312,...,,431.022,1046.08,51.8442,9.78078,197.151,,250.131,9.26072,1
GSM850533,97.543,755.761,51.7088,231.748,951.852,3.25682,3407.94,4263.96,777.368,4.07855,...,,442.393,918.314,43.9717,14.199,134.297,,101.852,27.4257,1
GSM850534,239.208,1277.37,55.9823,169.632,1414.09,19.4172,2191.2,4676.63,1168.21,15.5555,...,,362.291,767.772,81.9558,47.5295,99.134,,207.782,25.3495,1
GSM850535,184.561,1035.49,46.7219,112.697,596.137,41.2022,1704.02,4426.95,1406.57,7.80749,...,,366.262,827.665,74.1792,18.0873,830.739,,182.028,7.07118,1
GSM850536,113.501,1115.15,38.7741,8.48357,502.494,10.9981,2006.8,3342.9,1054.13,6.69671,...,,278.384,1335.57,44.6502,81.4475,653.635,,370.197,30.5586,1


GDS4987


Unnamed: 0,ID_REF,GSM1174425,GSM1174429,GSM1174436,GSM1174427,GSM1174430,GSM1174432,GSM1174435,GSM1174424,GSM1174428,...,GSM1174416,GSM1174419,GSM1174408,GSM1174413,GSM1174417,GSM1174420,GSM1174410,GSM1174411,GSM1174415,GSM1174422
0,,0.426693,0.0471764,0.0611334,0.417477,-0.0449924,-0.0921006,0.179974,0.081224,0.178698,...,0,0,0.272785,0.0554972,-0.223779,-0.425988,-0.129871,-0.128146,0.280311,-0.162884
1,,0.0790515,0.101923,0.273007,0.197721,0.211543,0.17741,0.029892,0.0103183,-0.00491857,...,-0.0281668,-0.0564113,-0.0489554,-0.0073061,0.018373,-0.0497928,-0.0893683,0.0530882,0.125371,-0.0116062
2,26682_81099_79501,-0.0371127,0.0815725,0.151132,0.455904,0.362855,0.0477085,0.347839,0.0799193,-0.064373,...,-0.0862403,0.00981712,0.0148506,-0.0373483,0.076395,-0.0118432,-0.140297,-0.0650997,0,-0.0770555
3,346288_140849_55251_100134822,0.102249,1.48231,-1.08528,0.665998,0.987918,0.765499,0.319453,0.451426,-0.78304,...,0.0837283,0.171076,-1.36677,0.0630341,0.00424862,-0.468315,-0.86887,0.496053,-0.283835,-0.0697675
4,26683_81399_441308_729759,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33292,,,,,,,,,,,...,,,,,,,,,,
33293,,,,,,,,,,,...,,,,,,,,,,
33294,,,,,,,,,,,...,,,,,,,,,,
33295,,,,,,,,,,,...,,,,,,,,,,


Removing all columns that have no Entrez mapping ...


Unnamed: 0,ID_REF,GSM1174425,GSM1174429,GSM1174436,GSM1174427,GSM1174430,GSM1174432,GSM1174435,GSM1174424,GSM1174428,...,GSM1174416,GSM1174419,GSM1174408,GSM1174413,GSM1174417,GSM1174420,GSM1174410,GSM1174411,GSM1174415,GSM1174422
2,26682_81099_79501,-0.0371127,0.0815725,0.151132,0.455904,0.362855,0.0477085,0.347839,0.0799193,-0.064373,...,-0.0862403,0.00981712,0.0148506,-0.0373483,0.076395,-0.0118432,-0.140297,-0.0650997,0,-0.0770555
3,346288_140849_55251_100134822,0.102249,1.48231,-1.08528,0.665998,0.987918,0.765499,0.319453,0.451426,-0.78304,...,0.0837283,0.171076,-1.36677,0.0630341,0.00424862,-0.468315,-0.86887,0.496053,-0.283835,-0.0697675
4,26683_81399_441308_729759,,,,,,,,,,...,,,,,,,,,,
9,100287497_100287934,-0.113919,0.134601,1.03149,-0.0326066,-0.0490508,0.0685182,0.290254,0.312844,-0.232195,...,-0.173526,0.510928,-0.0930266,-0.101963,0.0557885,0.0422387,-0.124155,0.130256,0.126083,0.0176716
10,400728_157693,0.957484,-0.158868,0.0432911,-0.0503192,-0.0509591,-0.458104,-0.102297,0.221353,0.0018363,...,-0.0975132,-0.156271,0.0412216,-0.0279093,0.0909681,-0.0470104,0.160393,0.0612626,0.129874,-0.204071
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10843,5598,0.389947,0.0569696,-0.0965199,-0.0867476,0.0481977,0.25568,-0.111089,0.169343,0.152913,...,-0.129497,-0.325287,0.029871,0,-0.101555,-0.137409,-0.096529,-0.0165434,-0.105213,-0.187737
10844,7732,0.279941,0.201456,0.299782,-0.0242777,-0.033123,-0.0972929,-0.0211053,-0.0224576,0.0501523,...,0.00330496,0,-0.012414,0.0279431,-0.0305076,-0.114369,-0.0721316,0.291912,-0.0340691,0.11974
10846,55244,-0.676515,-0.497063,-0.0282907,1.64466,2.01116,0.903198,2.19948,-0.220469,-0.170602,...,3.58522,0.365348,-0.596739,0.0324707,0.0842419,-0.215921,-0.0631962,0.34679,0.797729,-0.180359
10847,677882_677885,-0.453826,-0.841139,0.0315576,0.751601,1.73382,0.99511,1.47645,-0.275502,-0.609707,...,2.17916,1.04203,-0.161461,0.0472693,0.562274,0.242929,-0.37302,-0.158728,0.0796447,0.289137


Unique IDs: 7932 Actual Size: 8383
Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM1174425,GSM1174429,GSM1174436,GSM1174427,GSM1174430,GSM1174432,GSM1174435,GSM1174424,GSM1174428,...,GSM1174416,GSM1174419,GSM1174408,GSM1174413,GSM1174417,GSM1174420,GSM1174410,GSM1174411,GSM1174415,GSM1174422
0,2,2.818150,2.887690,2.980650,-3.525380,-3.521330,-3.566470,-3.429890,1.554540,2.080120,...,-1.869070,-2.228090,1.845970,2.282530,2.043430,1.916010,-1.745030,-1.050880,-0.415323,-0.368802
1,12,0.000000,0.014427,-0.116639,2.072260,1.418320,3.007250,0.717401,-0.133122,0.086389,...,3.846320,3.223370,-0.480368,-0.354922,-0.213374,0.183758,-0.405042,-0.405701,-0.063835,-0.410328
2,16,0.208924,0.735067,-0.115965,-0.761099,-0.280028,-1.158230,-0.522668,-0.435685,0.411367,...,-0.202639,-0.188616,0.430524,0.629131,0.447808,0.475255,0.334290,-0.439426,0.258777,-0.202018
3,18,-0.180470,0.129314,0.064477,0.009513,0.048908,-0.039592,0.004148,0.125017,0.300837,...,0.143869,-0.043517,0.051072,-0.024912,-0.023784,-0.020250,-0.023128,-0.024912,0.129123,-0.197569
4,21,0.318855,0.174927,0.222412,-0.049681,0.050637,-0.074867,0.204762,0.009822,-0.100112,...,0.000000,-0.194595,-0.008076,-0.180850,0.174923,-0.022442,0.111007,0.031285,-0.275916,-0.092824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7927,9556,-0.618012,-0.598186,-0.453303,0.346010,0.159875,0.367265,0.069210,-0.202255,-0.249895,...,-0.006843,0.308805,0.006623,-0.119037,0.169888,0.095421,0.507918,0.514419,0.215421,0.604333
7928,9659_100996724,0.023486,-0.112501,-0.270994,0.023322,0.000000,0.093695,0.357036,-0.047477,0.358214,...,-0.261129,0.141325,-0.030933,0.031506,-0.014572,-0.222528,0.090470,0.092452,0.038302,-0.171990
7929,9698,-0.079279,-0.144554,-0.251414,-0.425434,-0.298251,-0.884743,0.028409,0.066935,0.393334,...,-0.015659,0.200603,-0.030984,0.152534,0.132085,0.182817,-0.326638,-0.909085,-0.344563,-0.424698
7930,9742,-0.035834,-0.160748,0.003976,-0.243402,0.014436,-0.608540,0.057287,-0.110240,0.154281,...,0.195221,0.276109,0.074017,0.000000,0.083637,-0.069797,0.177304,-0.235866,-0.273764,-0.337288


Checking for spurious gene IDs (if any) ...
100129884
10294
10328
10329
113246
118672
121227
121599
121665
128240
128338
140691
196968
2188
219902
220929
22978
23254
23478
25911
25912
2662
28232
283537
2950
29880
29928
317772
349196
374354
3799
4259
440068
440104
440200
440279
4914
5033
51108
51234
51643
51706
5176
54939
54996
55657
56339
57146
5757
57611
57661
5826
5916
5920
60626
6171
6232
6302
63967
642559
643332
646029
64801
650669
656
6886
7253
79038
79050
80150
80975
84243
84304
84441
84817
84838
84879
85414
8711
9045
9445
9556
9698
9742
9789
Small Additional Crosscheck ...
(array([], dtype=int64),)


Unnamed: 0,ID_REF,GSM1174425,GSM1174429,GSM1174436,GSM1174427,GSM1174430,GSM1174432,GSM1174435,GSM1174424,GSM1174428,...,GSM1174416,GSM1174419,GSM1174408,GSM1174413,GSM1174417,GSM1174420,GSM1174410,GSM1174411,GSM1174415,GSM1174422
0,2,2.818150,2.887690,2.980650,-3.525380,-3.521330,-3.566470,-3.429890,1.554540,2.080120,...,-1.869070,-2.228090,1.845970,2.282530,2.043430,1.916010,-1.745030,-1.050880,-0.415323,-0.368802
1,12,0.000000,0.014427,-0.116639,2.072260,1.418320,3.007250,0.717401,-0.133122,0.086389,...,3.846320,3.223370,-0.480368,-0.354922,-0.213374,0.183758,-0.405042,-0.405701,-0.063835,-0.410328
2,16,0.208924,0.735067,-0.115965,-0.761099,-0.280028,-1.158230,-0.522668,-0.435685,0.411367,...,-0.202639,-0.188616,0.430524,0.629131,0.447808,0.475255,0.334290,-0.439426,0.258777,-0.202018
3,18,-0.180470,0.129314,0.064477,0.009513,0.048908,-0.039592,0.004148,0.125017,0.300837,...,0.143869,-0.043517,0.051072,-0.024912,-0.023784,-0.020250,-0.023128,-0.024912,0.129123,-0.197569
4,21,0.318855,0.174927,0.222412,-0.049681,0.050637,-0.074867,0.204762,0.009822,-0.100112,...,0.000000,-0.194595,-0.008076,-0.180850,0.174923,-0.022442,0.111007,0.031285,-0.275916,-0.092824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7927,9556,-0.618012,-0.598186,-0.453303,0.346010,0.159875,0.367265,0.069210,-0.202255,-0.249895,...,-0.006843,0.308805,0.006623,-0.119037,0.169888,0.095421,0.507918,0.514419,0.215421,0.604333
7928,9659_100996724,0.023486,-0.112501,-0.270994,0.023322,0.000000,0.093695,0.357036,-0.047477,0.358214,...,-0.261129,0.141325,-0.030933,0.031506,-0.014572,-0.222528,0.090470,0.092452,0.038302,-0.171990
7929,9698,-0.079279,-0.144554,-0.251414,-0.425434,-0.298251,-0.884743,0.028409,0.066935,0.393334,...,-0.015659,0.200603,-0.030984,0.152534,0.132085,0.182817,-0.326638,-0.909085,-0.344563,-0.424698
7930,9742,-0.035834,-0.160748,0.003976,-0.243402,0.014436,-0.608540,0.057287,-0.110240,0.154281,...,0.195221,0.276109,0.074017,0.000000,0.083637,-0.069797,0.177304,-0.235866,-0.273764,-0.337288


Eliminating all duplicates, replacing them with averages ...


Unnamed: 0,ID_REF,GSM1174425,GSM1174429,GSM1174436,GSM1174427,GSM1174430,GSM1174432,GSM1174435,GSM1174424,GSM1174428,...,GSM1174416,GSM1174419,GSM1174408,GSM1174413,GSM1174417,GSM1174420,GSM1174410,GSM1174411,GSM1174415,GSM1174422
0,2,2.818150,2.887690,2.980650,-3.525380,-3.521330,-3.566470,-3.429890,1.554540,2.080120,...,-1.869070,-2.228090,1.845970,2.282530,2.043430,1.916010,-1.745030,-1.050880,-0.415323,-0.368802
1,12,0.000000,0.014427,-0.116639,2.072260,1.418320,3.007250,0.717401,-0.133122,0.086389,...,3.846320,3.223370,-0.480368,-0.354922,-0.213374,0.183758,-0.405042,-0.405701,-0.063835,-0.410328
2,16,0.208924,0.735067,-0.115965,-0.761099,-0.280028,-1.158230,-0.522668,-0.435685,0.411367,...,-0.202639,-0.188616,0.430524,0.629131,0.447808,0.475255,0.334290,-0.439426,0.258777,-0.202018
3,18,-0.180470,0.129314,0.064477,0.009513,0.048908,-0.039592,0.004148,0.125017,0.300837,...,0.143869,-0.043517,0.051072,-0.024912,-0.023784,-0.020250,-0.023128,-0.024912,0.129123,-0.197569
4,21,0.318855,0.174927,0.222412,-0.049681,0.050637,-0.074867,0.204762,0.009822,-0.100112,...,0.000000,-0.194595,-0.008076,-0.180850,0.174923,-0.022442,0.111007,0.031285,-0.275916,-0.092824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7927,9556,-0.618012,-0.598186,-0.453303,0.346010,0.159875,0.367265,0.069210,-0.202255,-0.249895,...,-0.006843,0.308805,0.006623,-0.119037,0.169888,0.095421,0.507918,0.514419,0.215421,0.604333
7928,9659_100996724,0.023486,-0.112501,-0.270994,0.023322,0.000000,0.093695,0.357036,-0.047477,0.358214,...,-0.261129,0.141325,-0.030933,0.031506,-0.014572,-0.222528,0.090470,0.092452,0.038302,-0.171990
7929,9698,-0.079279,-0.144554,-0.251414,-0.425434,-0.298251,-0.884743,0.028409,0.066935,0.393334,...,-0.015659,0.200603,-0.030984,0.152534,0.132085,0.182817,-0.326638,-0.909085,-0.344563,-0.424698
7930,9742,-0.035834,-0.160748,0.003976,-0.243402,0.014436,-0.608540,0.057287,-0.110240,0.154281,...,0.195221,0.276109,0.074017,0.000000,0.083637,-0.069797,0.177304,-0.235866,-0.273764,-0.337288


Scanning for additional columns to add ...


100%|██████████| 25180/25180 [00:27<00:00, 917.68it/s] 


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,393046,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM1174425,,2.81815,,,,,0.0,,,,...,,,,,,,,,,1
GSM1174429,,2.88769,,,,,0.0144272,,,,...,,,,,,,,,,1
GSM1174436,,2.98065,,,,,-0.116639,,,,...,,,,,,,,,,1
GSM1174427,,-3.52538,,,,,2.07226,,,,...,,,,,,,,,,1
GSM1174430,,-3.52133,,,,,1.41832,,,,...,,,,,,,,,,1
GSM1174432,,-3.56647,,,,,3.00725,,,,...,,,,,,,,,,1
GSM1174435,,-3.42989,,,,,0.717401,,,,...,,,,,,,,,,1
GSM1174424,,1.55454,,,,,-0.133122,,,,...,,,,,,,,,,1
GSM1174428,,2.08012,,,,,0.0863886,,,,...,,,,,,,,,,1
GSM1174433,,1.8024,,,,,-0.384707,,,,...,,,,,,,,,,1




In [5]:
df_normalised_list = []
for df,i in zip(df_list, file_names):
    df_new = df[unique_geneids]
    df_new = (df_new - df_new.min())/(df_new.max()-df_new.min())
    df_new["PCOS"] = df["PCOS"]
    display(df_new)
    
    df_new.to_csv(i + "_normalized.csv")
    df_normalised_list.append(df_new)

ID_REF,1,2,3,131076,9,10,12,13,14,15,...,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM27536,,0.356383,,,0.0,0.384615,0.305418,0.0209581,1.0,0.607143,...,0.309761,,,,1.0,,0.41055,0.41055,,1
GSM27537,,0.597518,,,0.969251,0.48951,0.919428,0.0419162,0.0173238,0.392857,...,0.447211,,,,0.126031,,0.0389908,0.0389908,,1
GSM27538,,0.365248,,,0.260695,0.0,1.0,0.0,0.0,0.178571,...,0.699203,,,,0.62662,,0.963303,0.963303,,1
GSM27540,,0.150709,,,1.0,0.241259,0.0,0.0568862,0.100358,0.142857,...,0.445219,,,,0.246172,,1.0,1.0,,1
GSM27541,,0.0,,,0.628342,0.234266,0.115201,0.113772,0.146953,0.357143,...,0.61255,,,,0.23086,,0.497706,0.497706,,1
GSM27531,,0.602837,,,0.455882,0.143357,0.62304,0.0329341,0.666069,0.142857,...,0.462151,,,,0.47821,,0.0688073,0.0688073,,0
GSM27532,,0.223404,,,0.22861,0.374126,0.278428,0.0598802,0.233572,0.410714,...,0.511952,,,,0.240283,,0.518349,0.518349,,0
GSM27533,,0.624113,,,0.165775,0.0804196,0.636833,0.110778,0.673238,0.0,...,0.378486,,,,0.213192,,0.192661,0.192661,,0
GSM27534,,0.445035,,,0.229947,0.241259,0.518158,0.00898204,0.502389,0.339286,...,0.437251,,,,0.0,,0.133028,0.133028,,0
GSM27543,,1.0,,,0.395722,0.192308,0.659555,0.0299401,0.535245,0.535714,...,0.492032,,,,0.590106,,0.360092,0.360092,,0


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM29645,0.359551,,0.694396,0.0182983,,,,,,,...,0.542205,1.0,0.0,0.302326,,,,,0.0896899,1
GSM29646,0.579243,,0.421144,0.912168,,,,,,,...,1.0,0.788192,1.0,0.17759,,,,,0.927913,1
GSM29647,0.418096,,0.736568,0.209515,,,,,,,...,0.612196,0.0819188,0.0672043,0.133192,,,,,0.2171,1
GSM29648,0.176227,,0.320624,0.531565,,,,,,,...,0.428958,0.826568,0.865591,0.139535,,,,,0.590947,1
GSM29649,0.0,,0.0,1.0,,,,,,,...,0.541904,0.831734,0.142473,0.0295983,,,,,0.0620285,1
GSM29537,0.466884,,0.186597,0.589204,,,,,,,...,0.505257,0.548339,0.104839,0.192389,,,,,0.0,0
GSM29638,0.235068,,0.493356,0.0375114,,,,,,,...,0.248123,0.115867,0.0349462,0.477801,,,,,0.380553,0
GSM29643,0.338853,,0.556904,0.348582,,,,,,,...,0.3845,0.648708,0.107527,1.0,,,,,0.323554,0
GSM29644,0.735068,,0.126516,0.130833,,,,,,,...,0.0,0.0,0.0833333,0.0,,,,,0.29254,0
GSM29650,0.873448,,1.0,0.0,,,,,,,...,0.874136,0.928413,0.892473,0.926004,,,,,1.0,0


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM114841,,1.0,,,1.0,0.367089,0.143934,0.563865,1.0,0.169811,...,0.788337,,,,0.278626,,0.0487805,0.0487805,,0
GSM114844,,0.208973,,,0.414013,0.227848,0.216277,0.146288,0.100128,0.245283,...,1.0,,,,0.100509,,0.207317,0.207317,,0
GSM114845,,0.125693,,,0.61465,0.822785,0.342879,0.00927948,0.708601,1.0,...,0.354212,,,,0.00508906,,0.695122,0.695122,,0
GSM114849,,0.423746,,,0.302548,0.291139,0.169555,0.510917,0.0218228,0.0660377,...,0.585313,,,,1.0,,0.817073,0.817073,,0
GSM114851,,0.226505,,,0.270701,0.0,0.320271,0.125546,0.603338,0.132075,...,0.652268,,,,0.388041,,0.097561,0.097561,,0
GSM114854,,0.386747,,,0.0923567,0.658228,0.157498,0.477074,0.501926,0.0471698,...,0.732181,,,,0.259542,,0.414634,0.414634,,0
GSM114855,,0.438701,,,0.525478,0.164557,1.0,0.00218341,0.0410783,0.0943396,...,0.490281,,,,0.28117,,0.0853659,0.0853659,,0
GSM114834,,0.0286193,,,0.684713,0.0886076,0.0,0.762828,0.165597,0.0,...,0.0,,,,0.689567,,1.0,1.0,,1
GSM114842,,0.805466,,,0.210191,0.607595,0.10324,0.186681,0.296534,0.179245,...,0.766739,,,,0.0,,0.280488,0.280488,,1
GSM114843,,0.120407,,,0.72293,0.949367,0.434815,1.0,0.675225,0.245283,...,0.717063,,,,0.12341,,0.47561,0.47561,,1


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM155631,0.653276,0.810938,0.168546,0.350665,0.272959,0.341838,0.730945,0.744826,0.736345,0.711208,...,0.682032,0.446259,0.375688,0.508493,0.455459,,0.734211,0.734211,0.30879,0
GSM155643,0.353426,0.61021,0.0900933,0.405875,0.126684,0.6732,0.149307,0.669677,0.297598,0.381989,...,0.565364,0.109711,0.0720985,0.201682,0.18087,,0.781312,0.781312,0.118327,0
GSM155644,0.846963,0.564647,0.0302528,0.366577,0.128925,0.385669,0.121022,0.678778,0.768503,0.590598,...,0.54798,0.0,0.398746,0.303901,0.290844,,0.818469,0.818469,0.111728,0
GSM155729,0.351596,0.558253,1.0,0.278087,1.0,1.0,0.235161,0.937086,0.0,0.420498,...,0.0,1.0,0.0758657,0.900452,0.043677,,1.0,1.0,1.0,0
GSM156170,0.472015,0.245801,0.46289,0.443142,0.668163,0.479191,0.296338,0.547432,0.120795,0.513063,...,0.571991,0.537614,0.503406,0.180798,0.48695,,0.468456,0.468456,0.376739,0
GSM156171,0.581927,0.738539,0.446367,0.998472,0.592186,0.656963,0.401715,0.688473,0.324304,0.785003,...,0.739396,0.799523,0.231473,0.310821,0.018535,,0.542001,0.542001,0.416388,0
GSM156176,0.756673,0.378694,0.77788,0.133928,0.615433,0.403687,0.295478,0.604801,0.63366,0.61417,...,0.254742,0.858968,1.0,0.62425,0.283495,,0.633578,0.633578,0.311295,0
GSM156177,0.395325,0.374407,0.0473389,0.327033,0.262276,0.19006,0.0,0.654151,0.0863477,0.510837,...,0.818101,0.0122186,0.126702,0.272363,0.169886,,0.59029,0.59029,0.078271,0
GSM156178,0.0803662,0.616452,0.442229,0.514069,0.838356,0.936374,0.21448,1.0,0.00450444,0.0341097,...,0.411011,0.742203,0.205075,0.759293,0.359592,,0.92339,0.92339,0.841863,0
GSM156179,0.0,0.678148,0.542633,0.838737,0.988495,0.629411,0.211614,0.737938,0.360416,0.0,...,1.0,0.715982,0.467433,0.419867,0.106939,,0.613759,0.613759,0.586468,0


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM277438,1.0,1.0,1.0,0.322851,0.0,0.532115,0.479324,0.667352,0.593257,0.248661,...,0.827365,0.0,0.693762,0.718163,0.163969,,0.29779,0.29779,0.33255,0
GSM277439,0.653275,0.16758,0.165049,0.327351,0.34417,0.342526,0.449352,0.539547,0.540561,0.601389,...,0.740126,0.804617,0.266477,0.0,0.342748,,0.0407697,0.0407697,0.288548,0
GSM277440,0.523412,0.294368,0.457359,0.0,0.244294,0.252491,0.420944,0.770855,0.780176,0.175465,...,0.817903,0.959305,0.514504,0.093775,0.210836,,0.213538,0.213538,0.0477384,0
GSM277441,0.225761,0.432814,0.240948,0.362684,0.802241,0.153889,0.0,0.00163229,0.634459,0.237184,...,0.881571,0.646122,0.460852,0.0105169,0.539008,,0.350066,0.350066,0.267202,0
GSM277442,0.273412,0.139948,0.0,0.308425,0.810793,0.0,0.728834,0.106868,0.555612,0.653574,...,0.724989,0.536159,0.271925,0.0067937,0.387518,,0.459792,0.459792,0.0,0
GSM277443,0.0,0.105716,0.0513409,0.160863,0.773261,0.230467,0.270302,0.0,0.582594,0.422143,...,0.651493,0.770906,0.368172,0.144814,0.308082,,0.422993,0.422993,0.62529,0
GSM277444,0.260925,0.298196,0.195198,0.219102,0.824616,0.547188,0.588135,0.410149,0.690468,0.524464,...,0.545892,0.689307,0.403724,0.467909,0.280745,,0.741594,0.741594,0.335011,0
GSM277445,0.584686,0.60515,0.577372,0.498701,0.129667,0.477616,1.0,0.769483,0.42871,0.513,...,0.160656,0.151039,0.672704,0.556028,0.130192,,0.781478,0.781478,0.992892,0
GSM277446,0.61808,0.368432,0.372631,0.673303,0.5445,0.434083,0.407946,0.425484,0.406537,0.422143,...,0.621144,0.857435,0.520179,0.141905,0.286259,,0.408262,0.408262,0.581022,0
GSM277447,0.819981,0.526393,0.6252,0.471007,0.420078,1.0,0.874843,0.694507,0.858878,0.507879,...,0.637501,0.56548,0.198623,1.0,0.292257,,0.146019,0.146019,0.718815,0


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM201542,0.826925,0.151088,0.55927,0.0975916,0.234521,0.427557,0.338108,0.342172,0.621765,0.778657,...,0.636389,0.0384321,0.553655,0.502746,0.210966,,0.565971,0.565971,0.14677,1
GSM201543,0.750406,0.0369716,0.728029,0.0,0.0959198,0.537423,0.0,0.294047,0.683663,0.654951,...,0.49629,0.0,0.483539,0.0,0.52838,,0.83897,0.83897,0.125473,1
GSM201544,0.84279,0.0,0.771164,0.221675,0.311388,1.0,0.310492,0.0706268,1.0,0.786379,...,0.634281,0.833532,0.533717,0.449558,0.791723,,0.277485,0.277485,0.308905,1
GSM201545,0.634147,0.709944,0.928383,0.132638,0.551188,0.842405,0.363727,0.510438,0.633251,0.671635,...,0.431754,0.66607,0.735387,1.0,0.350432,,0.425291,0.425291,0.524615,1
GSM201829,0.542106,0.293777,0.601002,0.597466,0.664567,0.502625,0.202354,0.325754,0.681098,0.411931,...,0.377039,0.529531,0.706835,0.598627,0.489529,,0.646764,0.646764,0.292021,1
GSM201830,0.938848,0.427349,0.879643,0.411562,0.11646,0.566342,0.44412,0.509573,0.832381,1.0,...,1.0,0.0865296,0.892659,0.0915167,0.740147,,0.687634,0.687634,0.329061,1
GSM201831,1.0,0.152763,0.610366,0.0249667,0.188889,0.822493,0.591476,0.689195,0.897457,0.45614,...,0.518426,0.190708,0.248985,0.444879,0.770564,,0.828719,0.828719,0.434739,1
GSM201832,0.845634,0.0338439,0.0,0.802544,0.0,0.611758,0.44465,0.0,0.39104,0.741624,...,0.811761,0.269254,0.566086,0.205892,0.692655,,0.0,0.0,0.0,1
GSM201833,0.447655,0.327205,0.351811,0.304206,0.291637,0.449686,1.0,0.209782,0.46999,0.388642,...,0.384436,0.600064,0.190032,0.518981,0.731624,,0.495732,0.495732,0.332932,1
GSM201834,0.65621,0.272869,0.942459,0.282907,0.371977,0.735424,0.293804,0.192578,0.606962,0.50914,...,0.127252,0.596287,0.41676,0.474768,0.175803,,0.32739,0.32739,0.498276,1


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM201849,0.458835,0.378227,0.108546,0.464012,0.0653359,0.848337,0.795135,0.724952,0.480945,0.416413,...,0.75652,0.186684,0.0,0.00969933,0.165131,,0.853385,0.853385,0.139814,0
GSM201850,0.62777,0.64632,0.181504,0.310362,0.237945,0.215371,1.0,0.752945,0.782331,0.603324,...,0.849769,0.471523,0.217251,0.387122,0.457359,,0.774817,0.774817,0.311004,0
GSM201851,0.912408,0.333403,0.0941796,0.462528,0.114931,0.457095,0.614251,0.875939,0.884558,0.634409,...,0.82345,0.109827,0.389836,0.16512,0.281907,,0.96701,0.96701,0.151605,0
GSM201852,0.246348,0.518489,1.0,0.142679,1.0,1.0,0.918769,0.915012,0.148423,0.262967,...,0.0,0.992334,0.541589,1.0,0.0262282,,1.0,1.0,1.0,0
GSM201853,0.409938,0.223511,0.445634,0.380612,0.725924,0.373072,0.354124,0.0732485,0.193556,0.400482,...,0.60853,0.483193,0.387517,0.177082,0.487667,,0.378221,0.378221,0.318152,0
GSM201854,0.515519,0.586068,0.384389,1.0,0.762994,0.691629,0.753917,0.490812,0.337458,0.711986,...,0.806375,0.794161,0.0155579,0.14624,0.0,,0.495144,0.495144,0.374284,0
GSM201855,0.793946,0.267205,0.685058,0.161975,0.81552,0.38149,0.815104,0.566529,0.723982,0.632113,...,0.482999,1.0,1.0,0.677681,0.302457,,0.713395,0.713395,0.337897,0
GSM201856,0.420114,0.150908,0.022184,0.31091,0.176371,0.0,0.167718,0.486073,0.167193,0.478517,...,0.875842,0.0,0.0320483,0.0,0.120804,,0.549414,0.549414,0.0531618,0
GSM201857,0.0471771,0.578021,0.451017,0.412948,0.807001,0.916552,0.732974,0.97097,0.104623,0.00847419,...,0.401099,0.678878,0.527989,0.745275,0.358612,,0.864612,0.864612,0.80779,0
GSM201858,0.0,0.65329,0.511092,0.730888,0.98495,0.466226,0.511637,0.294739,0.331517,0.0,...,0.986086,0.588565,0.245362,0.400315,0.0791228,,0.462213,0.462213,0.495875,0


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM850527,0.434441,0.0,1.0,1.0,0.695249,0.294968,0.0127694,1.0,0.0,1.0,...,0.228134,0.173647,0.377126,0.193806,0.0,,1.0,1.0,0.0596846,0
GSM850528,0.480817,0.104285,0.0020124,0.284588,0.785497,0.0770181,0.495981,0.534207,0.433449,0.000878662,...,0.416488,1.0,0.277298,0.102602,0.104889,,0.0177607,0.0177607,0.40066,0
GSM850529,0.589708,0.120093,0.0,0.284712,0.722574,0.0255069,0.502639,0.293625,0.520687,0.32476,...,0.641144,0.922396,0.240641,0.170715,0.107342,,0.0,0.0,1.0,0
GSM850530,0.534821,0.352074,0.418144,0.317983,0.658039,0.376643,0.0,0.328219,0.264374,0.0994946,...,0.492877,0.362597,0.514314,0.0707735,0.138151,,0.853761,0.853761,0.931832,1
GSM850531,0.366893,0.506061,0.00748882,0.173828,0.735464,0.00528786,1.0,0.130208,0.314273,0.112469,...,1.0,0.421994,0.0,0.0290011,0.0689843,,0.0263392,0.0263392,0.810939,1
GSM850532,0.0,0.560055,0.210045,0.147566,0.610551,0.0945177,0.207783,0.0,0.456851,0.155483,...,0.567993,0.420845,0.561373,0.0,0.199106,,0.26751,0.26751,0.0472862,1
GSM850533,0.251124,0.47601,0.109241,0.127245,0.492935,0.0,0.881629,0.396327,0.475477,0.0,...,0.610309,0.227643,0.446696,0.0616502,0.119653,,0.0183907,0.0183907,0.439585,1
GSM850534,1.0,1.0,0.119396,0.0918431,1.0,0.425885,0.527225,0.541481,0.801295,0.442296,...,0.312233,0.0,1.0,0.526725,0.0752053,,0.196361,0.196361,0.394745,1
GSM850535,0.711123,0.757018,0.0973895,0.0593942,0.102724,1.0,0.385323,0.453658,1.0,0.143705,...,0.327009,0.0905675,0.886721,0.115905,1.0,,0.153092,0.153092,0.0,1
GSM850536,0.335482,0.837043,0.0785019,0.0,0.0,0.204011,0.473515,0.0723501,0.706195,0.100898,...,0.0,0.858598,0.45658,1.0,0.776129,,0.46923,0.46923,0.507243,1


ID_REF,1,2,3,131076,9,10,12,13,14,15,...,130916,51130_100302652,130940,130951,401331_10156_100271927,10720_7364_7365_7366_7367,8345,8345,131034,PCOS
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM1174425,,0.955117,,,,,0.128648,,,,...,,,,,,,,,,1
GSM1174429,,0.96552,,,,,0.131916,,,,...,,,,,,,,,,1
GSM1174436,,0.979426,,,,,0.102224,,,,...,,,,,,,,,,1
GSM1174427,,0.00614692,,,,,0.598101,,,,...,,,,,,,,,,1
GSM1174430,,0.00675278,,,,,0.449956,,,,...,,,,,,,,,,1
GSM1174432,,0.0,,,,,0.809916,,,,...,,,,,,,,,,1
GSM1174435,,0.0204319,,,,,0.291169,,,,...,,,,,,,,,,1
GSM1174424,,0.766085,,,,,0.09849,,,,...,,,,,,,,,,1
GSM1174428,,0.84471,,,,,0.148218,,,,...,,,,,,,,,,1
GSM1174433,,0.803164,,,,,0.0414954,,,,...,,,,,,,,,,1
