In [24]:
import pandas as pd
df = pd.DataFrame({ 'gene':["1 // foo // blabla",
                                   "2 // bar // lalala",
                                   "3 // qux // trilil",
                                   "4 // woz // hohoho",
                                   "nopeynope"], 
                   'cnv':['a','b','c','d','e'],
                   'cell1':[5,9,1,7,9], 
                   'cell2':[12,90,13,87,9],
                   'location':['1', '13.4', '1.1-2', '1.2-p22', '43-q44'],
                  'np_t_w': ['1','13.4','1.1','2','3']})
df

Unnamed: 0,cell1,cell2,cnv,gene,location,np_t_w
0,5,12,a,1 // foo // blabla,1,1.0
1,9,90,b,2 // bar // lalala,13.4,13.4
2,1,13,c,3 // qux // trilil,1.1-2,1.1
3,7,87,d,4 // woz // hohoho,1.2-p22,2.0
4,9,9,e,nopeynope,43-q44,3.0


In [10]:
df['location'].str.extract('(-)', expand=True)

Unnamed: 0,0
0,
1,
2,-
3,-
4,-


In [7]:
df = pd.DataFrame([['a','b',1],
                  ['a','c',1.9],
                  ['a','d',45],
                  ['a','e',3.9],
                  ['b','c', 2.1],
                  ['b','d',-2],
                  ['b','e',4.1]],
                 columns = ['cnv','exp','np_t_w'])
df

Unnamed: 0,cnv,exp,np_t_w
0,a,b,1.0
1,a,c,1.9
2,a,d,45.0
3,a,e,3.9
4,b,c,2.1
5,b,d,-2.0
6,b,e,4.1


In [8]:
### OLD
from scipy.stats import pearsonr
def rolling_pearson(df, group='cnv', data='np_t_w'):
    """
    computes pearson between array of data for one gene partners and each successive chromosomal neighbor
    
    :param df: pd df, clove results, sorted in ascending order of chromosome locus
    :param group: str, df column label on which to form gene neigbors, default 'cnv'
    :param data: str, df column label of source data to populate arrays, default 'np_t_w' (clove t-stats)
    """
    
    unique_genes = df[group].unique()
    results = []
    for idx, g0 in enumerate(unique_genes):
        g0_arr = df[df[group] == g0][data].values
        if idx < len(unique_genes) - 1:
            for g1 in unique_genes[idx+1:]:
                g1_arr = df[df[group] == g1][data].values
                print(g1_arr)
                pearson, pval = pearsonr(g0_arr, g1_arr)
                results.append([g0, g1, pearson, pval])

    return pd.DataFrame(results, columns=[group, group+'_k', 'pearson', 'pval'])

In [95]:
### NEW
from scipy.stats import pearsonr
from scipy.spatial import distance
def rolling_similarity(df, group='cnv', data='np_t_w', how='pearson'):
    """
    computes similarity between array of data for one gene partners and each successive chromosomal neighbor
    
    :param df: pd df, clove results, sorted in ascending order of chromosome locus
    :param group: str, df column label on which to form gene neigbors, default 'cnv'
    :param data: str, df column label of source data to populate arrays, default 'np_t_w' (clove t-stats)
    :param how: str, choice of: {pearson, euclidian, cosine}
    """
    
    unique_genes = df[group].unique()
    results = []
    for idx, g0 in enumerate(unique_genes):
        if idx < len(unique_genes) - 1:
            g1 = unique_genes[idx+1]
            merged = pd.merge(df[df[group] == g0], df[df[group] == g1], how='inner', on='exp')
            merged.columns = ['cnv_g0','exp','clove_g0','cnv_g1','clove_g1']
            if how == 'pearson':
                cols = [g0, g1, how ,'pval']
                pear, pval = pearsonr(merged['clove_g0'], merged['clove_g1'])
                results.append([g0,g1,pear,pval])
            elif how == 'euclidian':
                cols = [g0, g1, how]
                results.append([g0, g1, distance.euclidean(merged['clove_g0'], merged['clove_g1'])])
            elif how == 'cosine':
                cols = [g0, g1, how]
                results.append([g0, g1, distance.cosine(merged['clove_g0'], merged['clove_g1'])])

    return pd.DataFrame(results, columns=cols)

In [94]:
rolling_similarity(df, group='cnv',data='np_t_w')

q


Unnamed: 0,a,b,pearson,pval
0,a,b,-0.932864,0.234603


In [27]:
df

Unnamed: 0,cell1,cell2,cnv,gene,location,np_t_w
0,5,12,a,1 // foo // blabla,1,1.0
1,9,90,b,2 // bar // lalala,13.4,13.4
2,1,13,c,3 // qux // trilil,1.1-2,1.1
3,7,87,d,4 // woz // hohoho,1.2-p22,2.0
4,9,9,e,nopeynope,43-q44,3.0


In [13]:
pearsonr?

In [22]:
import numpy as np
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x[:2]

array([0, 1])

In [23]:
np.random.shuffle(x)


TypeError: 'NoneType' object is not subscriptable

develop explicit way to define certain combinations of genes on which to compute CLOvEs

In [87]:
import pandas as pd
import numpy as np
from scipy import stats
from cloveFunctions import *
import itertools

In [88]:
edf = pd.read_csv('data/breast_mrna.tab.gz', compression='gzip', sep='\t', index_col=0)
cdf = pd.read_csv('data/breast_CCLE_DEL_calls.csv.gz', compression='gzip', index_col=0)

In [89]:
e_samp = edf.index[1:3].values
c_samp = cdf.index[3:5].values

In [90]:
c_samp

array(['A2LD1', 'A2M'], dtype=object)

In [91]:
def explicitPairContextStat(expdf, cnvdf, exp_lis=False, cnv_lis=False, //
                            cat_df=False, min_n=2, min_var=0.2 nan_style='omit', permute=False):
    """
    takes exp and cnv genes (either all or explicitand returns pair summary statistics
    
    :param n_samp: int, number of random samples to take
    :param expdf: pandas dataframe, expression by sample (hopefully filtered, tissue specific, with matching samples in cnv)
    :param cnvdf: pandas dataframe, binarized mask 5(1=del, 0=nodel) deletion by sample (hopefully tissue specific, with matching samples in exp)
    :param exp_lis: list of str, HUGO gene names in expdf to restrict to, default is False (use all genes in expdf)
    :param exp_lis: list of str, HUGO gene names in cnvdf to restrict to, default is False (use all genes in cnvdf)
    :param in_df: pandas dataframe, previous calculations to concat new results to, used in while loop to get n_samp
    :param nan_style: str, how the stats.ttest_ind treats NANs, {‘propagate’, ‘raise’, ‘omit’}
    :param permute: bool, True will calculate pairs with randomly permuted expression matrix as null model
    
    returns df[['exp', 'cnv', 'cntxt_pos_mu', 'cntxt_neg_mu', 
                'cntxt_pos_var', 'cntxt_neg_var', 
                'cntxt_pos_n', 'cntxt_neg_n']]
    """
    
    cells = list(set(cnvdf.columns).intersection(expdf.columns))
    expdf = expdf[cells]
    cmask = cnvdf[cells] == 1
    
    if type(exp_lis) != bool:
        exp_samp = set(expdf.index).intersection(exp_lis)
        for gene in exp_lis:
            if gene not in expdf.index:
                print('{} not found in expdf.index.  Omitted'.format(gene))
    else:
        exp_samp = expdf.index
    
    if type(cnv_lis) != bool:
        cnv_samp = set(cnvdf.index).intersection(cnv_lis)
        for gene in cnv_lis:
            if gene not in cnvdf.index:
                print('{} not found in cnvdf.index.  Omitted'.format(gene))
    else:
        cnv_samp = cnvdf.index
        
    r={'exp':[],'cnv':[]}
    for pair in itertools.product(exp_samp, cnv_samp):
        r['exp'].append(pair[0])
        r['cnv'].append(pair[1])
    df = pd.DataFrame(r)
    
    pos_n, neg_n, = [], []
    pos_mu, neg_mu = [], []
    pos_var, neg_var = [], []
    cohens_d = []
    np_t_s, np_p_s = [], []
    np_t_w, np_p_w = [], []
    
    df
    
    for row in df.itertuples():
        # mask cnv contexts onto expression data
        pos = np.array(expdf.loc[row.exp][cmask.loc[row.cnv]])
        neg = np.array(expdf.loc[row.exp][~cmask.loc[row.cnv]])
        
        # calculate n
        pos_n.append(len(pos))
        neg_n.append(len(neg))
        
        print(pos_n, neg_n)
        
#         # calculate mu
#         pos_mu.append(pos.mean())
#         neg_mu.append(neg.mean())
        
#         # calculate var
#         pos_var.append(pos.var())
#         neg_var.append(neg.var())
        
#         # calculate cohen's d
#         cohens_d.append(cohenD(pos, neg))
        
#         # calculate t_stat, welch
#         t, p = stats.ttest_ind(pos, neg, nan_policy=nan_style, equal_var=True)
#         np_t_w.append(t)
#         np_p_w.append(p)
        
#         if permute:
#             pos = np.array(expdf.loc[row.exp][cmask_n.loc[row.cnv]])
#             neg = np.array(expdf.loc[row.exp][~cmask_n.loc[row.cnv]])
#             t, p = stats.ttest_ind(pos, neg, nan_policy=nan_style, equal_var=True)
#             np_t_w_null.append(t)
#             np_p_w_null.append(p)
            
#     df['pos_n'] = pos_n
#     df['neg_n'] = neg_n
#     df['pos_mu'] = pos_mu
#     df['neg_mu'] = neg_mu
#     df['pos_var'] = pos_var
#     df['neg_var'] = neg_var
#     df['cohens_d'] = cohens_d
#     df['np_t_w'] = np_t_w
#     df['np_p_w'] = np_p_w
    
#     if permute:
#         df['np_t_w_null'] = np_t_w_null
#         df['np_t_w_null'] = np_t_w_null
    
#     df.dropna(inplace=True)
#     df['t_shrnk_glob'] = np.vectorize(t_welch)(df['pos_n'], df['neg_n'], 
#                                                df['pos_mu'], df['neg_mu'], 
#                                                df['pos_var'], df['neg_var'], 
#                                                meanVar(expdf))
    
#     right = expdf.rename_axis('exp', axis=0) 
#     right['gene_var_exp'] = right.var(axis=1)
#     right = right.reset_index()
    
#     df = pd.merge(df, right[['exp','gene_var_exp']], on='exp')
    
#     if cat_df:
#         return pd.concat([cat_df, df])
        
    return df

SyntaxError: invalid syntax (<ipython-input-91-d71a3c9ad313>, line 1)

In [92]:
edf_f, cdf_f = mainFitler(edf, cnvdf_fh=cdf, var=0.2, n=2, amp_fh=False, dele_fh=False, mut_fh=False, save=False)
df = explicitPairContextStat(edf, cdf, exp_lis=e_samp, cnv_lis=c_samp)

TypeError: invalid file:                  AU565       BT20      BT474      BT483      BT549     CAL120  \
1/2-SBSRNA4        NaN        NaN   5.401017   5.717574   5.030948   6.448201   
A1BG               NaN        NaN        NaN   6.799552   7.599171   8.221060   
A1BG-AS1           NaN        NaN        NaN        NaN        NaN        NaN   
A2LD1              NaN        NaN   6.085782   6.441127   7.877578   5.075992   
A2M                NaN        NaN        NaN        NaN        NaN        NaN   
A2ML1         4.818356   4.184284        NaN        NaN        NaN        NaN   
A4GALT             NaN        NaN        NaN        NaN        NaN        NaN   
A4GNT              NaN        NaN        NaN        NaN        NaN        NaN   
AA06               NaN        NaN        NaN        NaN        NaN        NaN   
AAAS          9.229649   8.208900   7.934789   7.726963   8.577770   7.685151   
AACS          9.992298   9.378825   8.946910   9.248846   7.269798   8.656520   
AACSP1             NaN        NaN        NaN        NaN        NaN        NaN   
AADAC              NaN        NaN        NaN        NaN        NaN        NaN   
AADAT              NaN        NaN   6.254454   6.627580        NaN   6.339025   
AAGAB         7.578082   8.650473   7.721881   8.325902        NaN   7.472990   
AAK1          4.891353   5.217003   4.451226   4.487502   4.646080   4.716834   
AAMP          9.679279   9.168416   9.153500   8.451337   8.564627   9.017830   
AARS          9.550180  11.171280  11.074530  10.441280  10.388020  10.370440   
AARS2         7.150403   7.563896   6.649362   6.591405        NaN   6.694557   
AARSD1             NaN   4.470384        NaN   4.516075        NaN   4.399421   
AASDH         8.159118   7.676645   8.296540   8.234043   7.339728   8.152327   
AASDHPPT      9.759328   9.995366  10.193500   9.561549  10.028500  10.214240   
AASS               NaN   4.397417   4.109508        NaN        NaN   6.690876   
AATF         10.261070  11.319320  11.287300  10.396740  10.578690  10.697130   
AATK               NaN        NaN        NaN        NaN        NaN   5.174114   
ABAT          5.374396   8.057765   8.954705  10.049300        NaN        NaN   
ABCA1         5.286819   5.555085   4.658617   4.936168   4.453195   9.932750   
ABCA11P       4.601520   6.700054   8.166716   7.462878   5.762992   6.696349   
ABCA12        7.178539   6.416960   4.805860   6.445141        NaN        NaN   
ABCA13        4.620888        NaN        NaN        NaN        NaN        NaN   
...                ...        ...        ...        ...        ...        ...   
ZSCAN1             NaN        NaN        NaN        NaN        NaN        NaN   
ZSCAN12       4.190345   4.194638   4.115327        NaN        NaN   4.331519   
ZSCAN16       6.956788   7.698805   8.370701   7.018322   5.990665   7.110038   
ZSCAN18            NaN   7.872182   9.017076   9.843330   8.991950  10.368650   
ZSCAN2             NaN        NaN   5.739653   5.351689   5.731398   5.178578   
ZSCAN20            NaN        NaN   4.069165        NaN        NaN   4.178654   
ZSCAN21       8.725225   7.617147   8.382343   8.320641   8.263767   8.114889   
ZSCAN22       5.958741   6.416857   6.329459   6.188894   5.960686   6.800494   
ZSCAN23            NaN        NaN        NaN        NaN        NaN        NaN   
ZSCAN29       8.480873   8.242375   8.339835   8.031781   7.390029   7.335942   
ZSCAN30            NaN        NaN   6.543900   6.585787   5.893547   6.664070   
ZSCAN4             NaN        NaN        NaN        NaN        NaN        NaN   
ZSCAN5A       6.712712   5.092116        NaN        NaN        NaN   6.824927   
ZSWIM1        7.730974   7.696359   7.032717   7.227403   7.065143   6.829857   
ZSWIM3             NaN        NaN        NaN        NaN        NaN   5.708715   
ZSWIM5             NaN        NaN   5.012450   6.331182        NaN        NaN   
ZSWIM6        5.059113   7.811076   8.034542   8.475565   8.731724  10.211730   
ZSWIM7        6.776535   7.377791   7.177923   6.118944   6.935172   6.290571   
ZUFSP         7.656923   8.983916   8.818097   7.226705   7.693307   7.817564   
ZW10          9.862230  10.276770   9.692453   9.672103  10.281400  10.042920   
ZWILCH       10.114430  10.816400  10.108540   9.005857  10.355110   9.697626   
ZWINT        12.306900  12.070270  12.305410  11.790140  12.285510  12.080140   
ZXDA          4.208478   5.618784   5.817537   5.668045   4.776957   5.682596   
ZXDB          5.257425   5.399877   6.162034   5.551925   4.737779   5.453927   
ZXDC          6.268052   6.764904   6.845357   6.767751   5.776595   6.020465   
ZYG11A             NaN   7.563141        NaN   8.407823   7.605774   8.026693   
ZYG11B        8.575357   8.340189   8.753212   8.545071   8.353551   9.014222   
ZYX          10.261830   9.331145   7.264385   7.635322  10.084580   8.492724   
ZZEF1         6.471422   6.338926   6.240221   5.743932   6.728235   5.838322   
ZZZ3          8.962359   9.938775   9.785010   8.855000   9.749450  10.382230   

                CAL148      CAL51     CAL851      CAMA1    ...       MDAMB436  \
1/2-SBSRNA4   5.862162   6.607692   4.712810   5.230651    ...            NaN   
A1BG          6.371130   6.126693        NaN   7.742350    ...       6.824272   
A1BG-AS1           NaN        NaN        NaN        NaN    ...            NaN   
A2LD1         6.696254   7.141233   6.933365   6.505857    ...       6.193615   
A2M                NaN        NaN        NaN        NaN    ...            NaN   
A2ML1         6.730723        NaN   6.400949        NaN    ...            NaN   
A4GALT             NaN        NaN        NaN        NaN    ...            NaN   
A4GNT              NaN        NaN        NaN        NaN    ...            NaN   
AA06               NaN        NaN        NaN        NaN    ...            NaN   
AAAS          8.476448   8.478623        NaN        NaN    ...       7.993578   
AACS          8.605605   8.215746   9.288716   6.600951    ...       9.363530   
AACSP1             NaN        NaN        NaN        NaN    ...            NaN   
AADAC              NaN        NaN        NaN        NaN    ...            NaN   
AADAT              NaN   8.351869        NaN   7.170359    ...       6.382376   
AAGAB         8.366566   7.789415   7.635354   8.075536    ...       8.350380   
AAK1          4.413313   4.435174   4.885669   4.536779    ...       4.926903   
AAMP          9.028979   8.434494   8.914895   9.119010    ...       9.318343   
AARS          9.066320  10.399480  10.489330  10.294340    ...      10.367540   
AARS2         6.973182   7.171527   6.612123   7.724721    ...       6.197298   
AARSD1        4.428601   4.560750   4.575146   4.923492    ...       4.665651   
AASDH         8.041343   8.651756   7.204428   8.537359    ...       9.277981   
AASDHPPT      9.713813  10.673880   9.476205   9.759420    ...       9.762572   
AASS               NaN   6.376552        NaN        NaN    ...       4.272343   
AATF         11.371680  10.408230  10.917820  10.763880    ...      10.703100   
AATK               NaN   5.534264        NaN   6.215613    ...            NaN   
ABAT          5.800044   5.232220   6.096316  10.244410    ...       4.750387   
ABCA1              NaN   7.528114   6.487035   7.781025    ...      10.101480   
ABCA11P       6.799648   6.829173   4.648600   7.045971    ...       6.291848   
ABCA12        4.028794        NaN   4.837690        NaN    ...            NaN   
ABCA13             NaN        NaN        NaN   3.494250    ...            NaN   
...                ...        ...        ...        ...    ...            ...   
ZSCAN1             NaN        NaN        NaN        NaN    ...            NaN   
ZSCAN12       4.227465        NaN   4.351954   4.217842    ...       4.154917   
ZSCAN16       8.446662   7.308503   7.460758   8.801527    ...       5.440765   
ZSCAN18            NaN   8.606428   8.684165   7.870384    ...       8.709539   
ZSCAN2        6.371268   6.206897        NaN        NaN    ...            NaN   
ZSCAN20            NaN        NaN        NaN        NaN    ...            NaN   
ZSCAN21       8.941499   8.279793        NaN   8.523518    ...       8.549068   
ZSCAN22       7.152042   6.659347   5.962933   6.223348    ...       6.694190   
ZSCAN23            NaN        NaN        NaN        NaN    ...            NaN   
ZSCAN29       8.594872   8.384741   6.697005   9.059508    ...       8.215356   
ZSCAN30       6.697018   7.164075        NaN   6.342103    ...       6.306116   
ZSCAN4             NaN        NaN        NaN        NaN    ...            NaN   
ZSCAN5A       6.115686   6.103238   5.716368        NaN    ...       6.567625   
ZSWIM1        7.185990   6.794718        NaN   7.058635    ...       6.476053   
ZSWIM3             NaN        NaN        NaN        NaN    ...            NaN   
ZSWIM5        6.925499   4.805001        NaN        NaN    ...            NaN   
ZSWIM6        7.766529   8.657669   8.129996   7.321884    ...       8.384978   
ZSWIM7        7.828040   7.173896   7.493884   7.285599    ...       6.491092   
ZUFSP         8.142718   8.104419   7.153235   7.832600    ...       7.747949   
ZW10         10.107660   9.909033   9.802782   9.630733    ...       9.719796   
ZWILCH        9.510691  10.125400   9.636026  10.352660    ...      10.873210   
ZWINT        12.356410  12.411690  11.983480  12.213640    ...      12.292380   
ZXDA          5.986959   5.860412   4.821368   5.049491    ...       5.506668   
ZXDB          5.555560        NaN   5.149921   5.797910    ...       5.576138   
ZXDC          6.407387   6.753649   4.896261   6.718253    ...       6.016499   
ZYG11A        6.646481        NaN        NaN        NaN    ...       8.119946   
ZYG11B        9.348654   9.351545   7.514766   8.251154    ...       8.897943   
ZYX           7.275881   8.807075   9.281140   7.862486    ...       9.169788   
ZZEF1         6.080077   6.079540   6.044560   5.848208    ...       5.919734   
ZZZ3          9.137805   9.803556   9.328253   9.115391    ...      10.037540   

              MDAMB453   MDAMB468      SKBR3       T47D    UACC812    UACC893  \
1/2-SBSRNA4   5.178795   4.992288   5.285821        NaN   6.137365   5.673962   
A1BG          7.822566        NaN        NaN   6.638034   7.041119        NaN   
A1BG-AS1           NaN        NaN        NaN        NaN   5.584081        NaN   
A2LD1         4.966001   6.616900   6.069765   6.190813   4.239530   6.767208   
A2M                NaN   7.406219        NaN        NaN        NaN        NaN   
A2ML1              NaN        NaN   5.900330        NaN        NaN   4.164136   
A4GALT             NaN        NaN        NaN        NaN        NaN        NaN   
A4GNT              NaN        NaN        NaN        NaN        NaN        NaN   
AA06          4.168221        NaN        NaN        NaN        NaN        NaN   
AAAS          8.429693   8.486038   8.545306   8.629433   8.173907   8.252124   
AACS          9.605350   8.894754   9.517088   9.284242   9.208145   8.029938   
AACSP1             NaN        NaN        NaN        NaN        NaN        NaN   
AADAC              NaN        NaN        NaN        NaN        NaN        NaN   
AADAT              NaN        NaN        NaN        NaN   6.806400   8.915041   
AAGAB         8.782118   7.774441   7.653187   7.937500   8.186781   8.469779   
AAK1          4.585792   4.946612   4.767722        NaN   4.672083   4.502843   
AAMP          9.745142   9.474694   9.055880   8.777973   8.455665   9.095028   
AARS         10.403840  10.264270   9.561374  10.919070  10.219930  10.938930   
AARS2         7.058317   6.476547   6.760015   6.761700   8.058346   6.866194   
AARSD1        4.330661   4.614783        NaN        NaN        NaN   4.281547   
AASDH         8.025286   7.105355   8.415030   8.633640   7.448323   7.539519   
AASDHPPT      9.953846  10.181720   9.491801  10.491160   9.911799   9.462094   
AASS               NaN        NaN        NaN        NaN        NaN        NaN   
AATF         10.858490  10.342990  10.015320  10.394340  10.846720  10.184270   
AATK               NaN        NaN        NaN        NaN        NaN        NaN   
ABAT          7.287071   5.213668   4.664408   9.509522  10.432790   4.547066   
ABCA1              NaN        NaN   5.426454   5.051885   5.023736   6.888092   
ABCA11P       6.179356   4.396465   5.712473   7.136001   5.869844   5.677229   
ABCA12        8.513917   4.430781   8.381972   8.699961   6.859760   5.767731   
ABCA13             NaN        NaN        NaN        NaN        NaN        NaN   
...                ...        ...        ...        ...        ...        ...   
ZSCAN1             NaN        NaN        NaN        NaN        NaN        NaN   
ZSCAN12       4.492391   4.114480   4.452775        NaN        NaN        NaN   
ZSCAN16       7.446225   8.104886   7.002219   8.143650   7.796094   7.280588   
ZSCAN18            NaN  10.252400        NaN   9.335976   8.829829   7.318254   
ZSCAN2             NaN        NaN        NaN   5.389772   5.195696   5.903602   
ZSCAN20            NaN        NaN   4.325872        NaN        NaN        NaN   
ZSCAN21       8.393926   7.786510   8.731034   8.139547   7.311099   8.436025   
ZSCAN22       6.146975   6.305369   6.830027   6.141580   6.298183   5.922633   
ZSCAN23            NaN        NaN        NaN        NaN        NaN        NaN   
ZSCAN29       8.958674   7.289526   8.577037   8.130506   8.154562   8.458700   
ZSCAN30       6.044507   5.789701        NaN   6.335481   6.351107        NaN   
ZSCAN4             NaN   4.329499        NaN        NaN        NaN        NaN   
ZSCAN5A       6.752105   6.203394   6.496085        NaN   4.700283        NaN   
ZSWIM1        7.095438   6.789234   8.036248   7.428427   7.138898   7.368541   
ZSWIM3        6.288714        NaN        NaN        NaN        NaN        NaN   
ZSWIM5        4.988909   6.287229        NaN        NaN   6.083219        NaN   
ZSWIM6        7.274949   7.671754   6.137067   7.474859   9.404527   6.902036   
ZSWIM7        8.050698   7.078139   7.053482   7.225688   7.428633   6.549287   
ZUFSP         8.372729   7.924757   7.658010   7.543463   8.355267   9.643117   
ZW10          9.861009  10.213070   9.672779  10.759320   9.396257   9.133978   
ZWILCH       10.813400   9.701169  10.201880  10.210170   9.262472   9.986816   
ZWINT        12.773760  12.273890  12.882160  12.825140  12.284360  11.855430   
ZXDA          5.136422   6.028195   5.099157   5.395938   5.423396   5.727188   
ZXDB          5.245288   5.803195   5.105470   5.032590   5.048845   5.777585   
ZXDC          6.921565   6.344338   6.199790   6.574778   7.403573   6.842925   
ZYG11A        8.423384   8.146307        NaN   8.013076   7.919800        NaN   
ZYG11B        8.856997   9.023488   9.273552   8.684761   8.474782   7.813156   
ZYX           6.943276   8.704727   8.884001        NaN   7.386169   9.400195   
ZZEF1         6.107978   6.031191   6.155779   6.034748   6.628869   6.326017   
ZZZ3          9.292901  10.502180   9.181607   9.057616   8.273064   8.376453   

                  YMB1      ZR751     ZR7530  
1/2-SBSRNA4   5.461494        NaN   5.352480  
A1BG          8.521523   7.731496   6.945533  
A1BG-AS1           NaN        NaN        NaN  
A2LD1         6.553315   6.368450   6.027463  
A2M                NaN        NaN        NaN  
A2ML1              NaN        NaN        NaN  
A4GALT             NaN        NaN        NaN  
A4GNT              NaN   4.388632        NaN  
AA06               NaN        NaN        NaN  
AAAS          7.589940        NaN   7.935206  
AACS          9.604330   9.366894   9.054392  
AACSP1             NaN        NaN        NaN  
AADAC              NaN        NaN        NaN  
AADAT         8.033582   6.746711        NaN  
AAGAB         8.348037   8.309408   8.758870  
AAK1          4.563789   4.625151   4.706801  
AAMP          8.992434   8.710091   8.564691  
AARS         11.440810  10.206370   9.500120  
AARS2         6.706370        NaN   6.600794  
AARSD1             NaN        NaN        NaN  
AASDH         8.448802   8.685290   8.258714  
AASDHPPT      9.566819   9.993601   8.996769  
AASS               NaN        NaN        NaN  
AATF         10.505390   9.425864  12.487220  
AATK               NaN        NaN        NaN  
ABAT          8.745520   9.319051   9.866115  
ABCA1         8.325855   7.468163   6.674429  
ABCA11P       6.429127   5.904540   5.748567  
ABCA12        9.758344   8.097042        NaN  
ABCA13             NaN        NaN        NaN  
...                ...        ...        ...  
ZSCAN1             NaN        NaN        NaN  
ZSCAN12            NaN        NaN   4.117767  
ZSCAN16       7.528583   7.915255   7.242831  
ZSCAN18            NaN        NaN        NaN  
ZSCAN2        5.387585        NaN   5.646473  
ZSCAN20            NaN        NaN   4.289713  
ZSCAN21       8.149582   7.938695   7.710341  
ZSCAN22       6.013011   5.908308   6.403885  
ZSCAN23            NaN        NaN        NaN  
ZSCAN29       9.013274   8.303932   8.008774  
ZSCAN30       5.987273   6.343423   5.876367  
ZSCAN4             NaN        NaN        NaN  
ZSCAN5A            NaN        NaN        NaN  
ZSWIM1        6.945576   6.610644   6.999290  
ZSWIM3             NaN        NaN        NaN  
ZSWIM5             NaN   5.593190   5.537770  
ZSWIM6        5.972131   8.052469   9.109592  
ZSWIM7        7.209974   7.042388   5.585066  
ZUFSP         6.979407   7.274042   7.718757  
ZW10          9.017020   9.225289   9.012209  
ZWILCH       10.277460  10.398470   9.650302  
ZWINT        12.249660  12.621280  11.530060  
ZXDA          5.330254   5.008054   5.339243  
ZXDB          5.296610   5.069002   5.428846  
ZXDC          6.678727   6.836028   6.956828  
ZYG11A        7.691551   7.285720   7.280116  
ZYG11B        8.154350   8.469786   9.191729  
ZYX           6.802321   6.434814   8.194097  
ZZEF1         5.835767        NaN   5.870717  
ZZZ3          8.979249   9.373826   9.153612  

[16237 rows x 59 columns]

In [68]:
df['exp'].unique()

array(['A1BG'], dtype=object)

In [69]:
df

Unnamed: 0,cnv,exp
0,A2LD1,A1BG
1,A2M,A1BG
