In [3]:
# https://pandas.pydata.org/docs/user_guide/10min.html#min
In [1]: import numpy as np

In [2]: import pandas as pd

In [2]:
# Creating a Series by passing a list of values, letting pandas create a default integer index
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [6]:
#Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns
dates = pd.date_range("20130101", periods=10)

In [7]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
               '2013-01-09', '2013-01-10'],
              dtype='datetime64[ns]', freq='D')

In [11]:
df = pd.DataFrame(np.random.randn(10, 4), index=dates, columns=list("1234"))

In [12]:
df

Unnamed: 0,1,2,3,4
2013-01-01,1.367263,-0.171257,0.42861,0.050025
2013-01-02,-1.8864,0.261755,0.033428,-2.107026
2013-01-03,0.085557,0.63621,1.441403,1.979614
2013-01-04,-1.615251,1.010016,0.377631,-1.631396
2013-01-05,0.951663,-0.78042,1.614135,-0.306678
2013-01-06,1.57336,0.909731,-1.145545,0.603921
2013-01-07,-1.009285,-0.242192,0.270666,-1.809228
2013-01-08,-1.095781,-0.92847,-0.474325,-0.760582
2013-01-09,-1.180316,-1.488034,1.177143,0.553916
2013-01-10,1.01186,-0.761266,-1.095542,-0.941848


In [16]:
#Creating a DataFrame by passing a dict of objects that can be converted to series-ish.
df2 = pd.DataFrame(
   {
       "A": 1.0,
       "B": pd.Timestamp("20130102"),
       "C": pd.Series(1, index=list(range(4)), dtype="float32"),
       "D": np.array([3] * 4, dtype="int32"),
       "E": pd.Categorical(["test", "train", "test", "train"]),
       "F": "foo",
   }
 )

In [20]:
df2


Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [21]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [23]:
#df2.<TAB> #NOT using IPython

In [24]:
df.head()

Unnamed: 0,1,2,3,4
2013-01-01,1.367263,-0.171257,0.42861,0.050025
2013-01-02,-1.8864,0.261755,0.033428,-2.107026
2013-01-03,0.085557,0.63621,1.441403,1.979614
2013-01-04,-1.615251,1.010016,0.377631,-1.631396
2013-01-05,0.951663,-0.78042,1.614135,-0.306678


In [25]:
df.tail()

Unnamed: 0,1,2,3,4
2013-01-06,1.57336,0.909731,-1.145545,0.603921
2013-01-07,-1.009285,-0.242192,0.270666,-1.809228
2013-01-08,-1.095781,-0.92847,-0.474325,-0.760582
2013-01-09,-1.180316,-1.488034,1.177143,0.553916
2013-01-10,1.01186,-0.761266,-1.095542,-0.941848


In [26]:
df2.head()

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [27]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
               '2013-01-09', '2013-01-10'],
              dtype='datetime64[ns]', freq='D')

In [29]:
df.columns

Index(['1', '2', '3', '4'], dtype='object')

In [30]:
df.to_numpy()

array([[ 1.36726254, -0.17125722,  0.42860974,  0.050025  ],
       [-1.88640045,  0.26175511,  0.03342788, -2.10702568],
       [ 0.08555655,  0.6362095 ,  1.44140334,  1.97961414],
       [-1.61525137,  1.01001577,  0.37763067, -1.63139556],
       [ 0.95166254, -0.78041977,  1.61413471, -0.30667838],
       [ 1.57336017,  0.90973138, -1.14554499,  0.60392113],
       [-1.00928518, -0.24219227,  0.27066555, -1.80922849],
       [-1.09578078, -0.92846957, -0.47432482, -0.7605815 ],
       [-1.18031611, -1.48803418,  1.17714336,  0.55391589],
       [ 1.01185966, -0.76126599, -1.09554207, -0.94184849]])

In [31]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [32]:
df2.describe() # summary stats

Unnamed: 0,A,C,D
count,4.0,4.0,4.0
mean,1.0,1.0,3.0
std,0.0,0.0,0.0
min,1.0,1.0,3.0
25%,1.0,1.0,3.0
50%,1.0,1.0,3.0
75%,1.0,1.0,3.0
max,1.0,1.0,3.0


In [34]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06,2013-01-07,2013-01-08,2013-01-09,2013-01-10
1,1.367263,-1.8864,0.085557,-1.615251,0.951663,1.57336,-1.009285,-1.095781,-1.180316,1.01186
2,-0.171257,0.261755,0.63621,1.010016,-0.78042,0.909731,-0.242192,-0.92847,-1.488034,-0.761266
3,0.42861,0.033428,1.441403,0.377631,1.614135,-1.145545,0.270666,-0.474325,1.177143,-1.095542
4,0.050025,-2.107026,1.979614,-1.631396,-0.306678,0.603921,-1.809228,-0.760582,0.553916,-0.941848


In [7]:
df3 = pd.read_csv("C:\\Users\\Mercurial\\Downloads\\tmp\\TCGA-LUAD-L4.csv")

In [8]:
df3

Unnamed: 0,Sample_ID,Cancer_Type,Sample_Type,SetID,X1433EPSILON,X4EBP1,X4EBP1_pS65,X4EBP1_pT37T46,X53BP1,ACC_pS79,...,NRF2,PARPAB3,THYMIDILATESYNTHASE,TTF1,CHROMOGRANINANTERM,CK5,NAPSINA,P63,RET_pY905,SYNAPTOPHYSIN
0,TCGA-44-2657-01A-21-2190-20,LUAD,Primary,51,-0.193310,-0.403080,-0.147000,0.316530,-0.532830,-0.980510,...,0.044750,0.199650,-0.22208,4.1317,0.141930,-0.29942,-0.003136,-0.495560,-0.031969,0.184620
1,TCGA-64-1676-01A-21-2190-20,LUAD,Primary,51,-0.127560,0.126140,-0.118130,-0.465630,-0.587760,-0.549750,...,-0.195990,-0.114490,-0.29645,4.6185,0.180240,-0.15951,0.088298,-0.065447,-0.144650,0.068915
2,TCGA-44-3919-01A-11-2190-20,LUAD,Primary,51,-0.338430,-0.617230,-0.211500,0.634580,-0.708460,0.182930,...,-0.272550,0.067162,-0.40472,4.9847,0.083862,-0.26624,0.232980,-0.217480,0.252600,0.668800
3,TCGA-55-1592-01A-21-2190-20,LUAD,Primary,51,-0.332780,-0.124810,-0.034797,0.893290,-0.315700,0.300790,...,-0.347790,0.040919,-0.44552,4.2575,0.068243,-0.40863,-0.122020,-0.680860,0.089811,1.035600
4,TCGA-38-4627-01A-21-2190-20,LUAD,Primary,51,-0.096924,-0.419540,-0.235340,-0.335730,-0.650190,-0.307990,...,0.023316,0.473940,-0.41877,2.7930,0.118550,-0.37251,0.374860,-1.231700,0.239610,0.465600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
357,TCGA-49-AARO-01A-21-A45B-20,LUAD,Primary,95,-0.042698,-0.549670,-0.127270,-0.145420,-0.596050,-0.087433,...,,,,,,,,,,
358,TCGA-49-AARR-01A-21-A45B-20,LUAD,Primary,95,0.110640,-0.589670,-0.281660,-0.095445,-0.853960,-0.257230,...,,,,,,,,,,
359,TCGA-4B-A93V-01A-21-A45B-20,LUAD,Primary,95,0.028928,-0.023529,0.016117,0.483560,-0.068947,-0.357420,...,,,,,,,,,,
360,TCGA-S2-AA1A-01A-21-A45B-20,LUAD,Primary,95,-0.212830,-0.468650,-0.166380,-0.509960,-0.070919,-0.215100,...,,,,,,,,,,


In [9]:
df3.index

RangeIndex(start=0, stop=362, step=1)

In [17]:
tcpaData = df3.iloc[:, 4:df3.index.stop]

In [18]:
tcpaData

Unnamed: 0,X1433EPSILON,X4EBP1,X4EBP1_pS65,X4EBP1_pT37T46,X53BP1,ACC_pS79,ACC1,AKT,AKT_pS473,AKT_pT308,...,NRF2,PARPAB3,THYMIDILATESYNTHASE,TTF1,CHROMOGRANINANTERM,CK5,NAPSINA,P63,RET_pY905,SYNAPTOPHYSIN
0,-0.193310,-0.403080,-0.147000,0.316530,-0.532830,-0.980510,-1.087500,-0.260750,-0.097227,-0.074816,...,0.044750,0.199650,-0.22208,4.1317,0.141930,-0.29942,-0.003136,-0.495560,-0.031969,0.184620
1,-0.127560,0.126140,-0.118130,-0.465630,-0.587760,-0.549750,-0.342900,0.152870,-0.473530,-0.752860,...,-0.195990,-0.114490,-0.29645,4.6185,0.180240,-0.15951,0.088298,-0.065447,-0.144650,0.068915
2,-0.338430,-0.617230,-0.211500,0.634580,-0.708460,0.182930,0.188150,0.246630,0.439210,-0.197140,...,-0.272550,0.067162,-0.40472,4.9847,0.083862,-0.26624,0.232980,-0.217480,0.252600,0.668800
3,-0.332780,-0.124810,-0.034797,0.893290,-0.315700,0.300790,0.323950,0.131380,0.178060,-0.204200,...,-0.347790,0.040919,-0.44552,4.2575,0.068243,-0.40863,-0.122020,-0.680860,0.089811,1.035600
4,-0.096924,-0.419540,-0.235340,-0.335730,-0.650190,-0.307990,-0.188780,0.024076,-0.410910,-0.210690,...,0.023316,0.473940,-0.41877,2.7930,0.118550,-0.37251,0.374860,-1.231700,0.239610,0.465600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
357,-0.042698,-0.549670,-0.127270,-0.145420,-0.596050,-0.087433,0.037383,0.248870,-0.027798,-0.245210,...,,,,,,,,,,
358,0.110640,-0.589670,-0.281660,-0.095445,-0.853960,-0.257230,0.151140,-0.070331,-0.140600,0.066302,...,,,,,,,,,,
359,0.028928,-0.023529,0.016117,0.483560,-0.068947,-0.357420,-0.051070,-0.014914,-0.010301,-0.369890,...,,,,,,,,,,
360,-0.212830,-0.468650,-0.166380,-0.509960,-0.070919,-0.215100,-0.072850,0.014571,-0.362500,-0.445230,...,,,,,,,,,,


In [21]:
geneList1 = df3.iloc[0, 4:df3.index.stop]

In [22]:
geneList1

X1433EPSILON      -0.19331
X4EBP1            -0.40308
X4EBP1_pS65         -0.147
X4EBP1_pT37T46     0.31653
X53BP1            -0.53283
                    ...   
CK5               -0.29942
NAPSINA          -0.003136
P63               -0.49556
RET_pY905        -0.031969
SYNAPTOPHYSIN      0.18462
Name: 0, Length: 237, dtype: object

In [24]:
geneList2 = df3.columns

In [25]:
geneList2

Index(['Sample_ID', 'Cancer_Type', 'Sample_Type', 'SetID', 'X1433EPSILON',
       'X4EBP1', 'X4EBP1_pS65', 'X4EBP1_pT37T46', 'X53BP1', 'ACC_pS79',
       ...
       'NRF2', 'PARPAB3', 'THYMIDILATESYNTHASE', 'TTF1', 'CHROMOGRANINANTERM',
       'CK5', 'NAPSINA', 'P63', 'RET_pY905', 'SYNAPTOPHYSIN'],
      dtype='object', length=241)

In [26]:
geneList3 = df3.columns[4:df3.index.stop]

In [27]:
geneList3

Index(['X1433EPSILON', 'X4EBP1', 'X4EBP1_pS65', 'X4EBP1_pT37T46', 'X53BP1',
       'ACC_pS79', 'ACC1', 'AKT', 'AKT_pS473', 'AKT_pT308',
       ...
       'NRF2', 'PARPAB3', 'THYMIDILATESYNTHASE', 'TTF1', 'CHROMOGRANINANTERM',
       'CK5', 'NAPSINA', 'P63', 'RET_pY905', 'SYNAPTOPHYSIN'],
      dtype='object', length=237)

In [28]:
type(geneList3)

pandas.core.indexes.base.Index