# About Series and DataFrames

In [7]:
import pandas as pd
import numpy as np

### Series object

- an ordered, one-dimensional array of data with an index. 
- All the data in a Series is of the same data type. 
- Series arithmetic is vectorised after first aligning the Series index for each of the operands.

In [10]:
s = pd.Series(10)
s

0    10
dtype: int64

In [12]:
S = pd.Series(range(10))
S

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [14]:
s1 = pd.Series(range(4))
s2 = pd.Series(range(2,5))
s3 = s1 + s2
print("s1 :\n",s1,"s2 :\n",s2,"s1+s2 :\n", s3)

s1 :
 0    0
1    1
2    2
3    3
dtype: int64 s2 :
 0    2
1    3
2    4
dtype: int64 s1+s2 :
 0    2.0
1    4.0
2    6.0
3    NaN
dtype: float64


In [9]:
type(s3)

pandas.core.series.Series

### DataFrame object

- A DataFrame object is a 2D table of data with column and row indices.
- Columns are mades up of Series objects
- Structurally, a DataFrame can be thought of as a collection of Series objects with the same index.

<img src ="f1.png">

A DataFrame has two indexes:

- column index (__df.columns__) that might be:
    - a list of strings(variable names) or integers
- row index (__df.index__) that might be:
    - Integers : for row numbers
    - Strings  : for case names
    - DatetimeIndex or PeriodIndex - for time series
    

### Get Data into a DataFrame

In [16]:
# Instantiate an empty df
df = pd.DataFrame()

## Loading from csv

df = pd.read_csv("file.csv")

df = pd.read_csv("file.csv", header= 0, index_col= 0, quotechar='"', sep=':', na_values=['na','-','.',''])

##### header
Row number(s) to use as the column names, and the start of the data. 
Default behavior is to infer the column names: if no names are passed the behavior is identical to header=0 and column names are inferred from the first line of the file, if column names are passed explicitly then the behavior is identical to header=None.
Explicitly pass header=0 to be able to replace existing names. 
The header can be a list of integers that specify row locations for a multi-index on the columns e.g. [0,1,3]. Intervening rows that are not specified will be skipped (e.g. 2 in this example is skipped). Note that this parameter ignores commented lines and empty lines if skip_blank_lines=True, so header=0 denotes the first line of data rather than the first line of the file

##### names -> arraylike

List of column names to use. If file contains no header row, then you should explicitly pass header=None. Duplicates in this list will cause a UserWarning to be issued

##### index_col : int or sequence or False, default None

Column to use as the row labels of the DataFrame. If a sequence is given, a MultiIndex is used. If you have a malformed file with delimiters at the end of each line, you might consider index_col=False to force pandas to _not_ use the first column as the index (row names)

##### na_values : scalar, str, list-like, or dict, default None

Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: ‘’, ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’, ‘-NaN’, ‘-nan’, ‘1.#IND’, ‘1.#QNAN’, ‘N/A’, ‘NA’, ‘NULL’, ‘NaN’, ‘n/a’, ‘nan’, ‘null’.

https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html

### Loading from Excel

### Load a DataFrame from a MYSQL database

### Load a DataFrame from Series

In [19]:
s4 = s1 * s2
s2

0    1
1    2
2    3
3    4
dtype: int64

In [20]:
s2.index = s2.index+ 2 # misalign index
s2

2    1
3    2
4    3
5    4
dtype: int64

In [22]:
s1

0    0
1    1
2    2
3    3
dtype: int64

In [21]:
df = pd.concat([s1,s2],axis=1) # has in integer column labels
df

Unnamed: 0,0,1
0,0.0,
1,1.0,
2,2.0,1.0
3,3.0,2.0
4,,3.0
5,,4.0


In [15]:
s3 = pd.Series({'Tom':1, 'Dick':2, 'Harry': 9})
s4 = pd.Series ({'Tom':5, 'Dick':6, 'Larry': 10})
df = pd.concat({'A':s3,'B':s4},axis =1) #DOES NOT GUARANTEE COL ORDER if indices are not aligned
df

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  app.launch_new_instance()


Unnamed: 0,A,B
Dick,2.0,6.0
Harry,9.0,
Larry,,10.0
Tom,1.0,5.0


### Get a  DataFrame from a Python dictionary

In [39]:
# Data is in columns

df = pd.DataFrame({
    'col0':[1.0,2.0,3.0],
    'col1':[2.0,3.0,8.0],
    'col2':[4.0,9.0,10.0]})

In [40]:
df

Unnamed: 0,col0,col1,col2
0,1.0,2.0,4.0
1,2.0,3.0,9.0
2,3.0,8.0,10.0


In [42]:
# Data is in rows

df =pd.DataFrame.from_dict({
    'row0': {'col0':0, 'col1': 1},
    'row1': {'col0':16, 'col1': 45}
}, orient ='index')

In [43]:
df

Unnamed: 0,col0,col1
row0,0,1
row1,16,45


In [46]:
df = pd.DataFrame.from_dict({
    'row0':[1, 1+1j, 2+0j],
    'row1':[ 3, 3+9j,9]
},orient='index')

In [47]:
df

Unnamed: 0,0,1,2
row0,1,(1+1j),(2+0j)
row1,3,(3+9j),(9+0j)


### Create fake data to test

In [54]:
df = pd.DataFrame(np.random.rand(20,10))

In [55]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.64,0.17,0.976,0.552,0.538,0.86,0.875,0.865,0.844,0.842
1,0.067,0.733,0.798,0.987,0.486,0.823,0.636,0.84,0.688,0.758
2,0.966,0.421,0.964,0.027,0.021,0.846,0.33,0.891,0.25,0.393
3,0.327,0.279,0.567,0.377,0.686,0.183,0.989,0.63,0.823,0.936
4,0.467,0.949,0.312,0.718,0.417,0.205,0.765,0.702,0.42,0.761
5,0.895,0.05,0.234,0.809,0.955,0.258,0.846,0.166,0.604,0.949
6,0.524,0.652,0.075,0.412,0.576,0.828,0.013,0.682,0.58,0.925
7,0.496,0.806,0.234,0.499,0.665,0.745,0.447,0.882,0.059,0.32
8,0.361,0.559,0.94,0.028,0.941,0.62,0.811,0.57,0.75,0.531
9,0.996,0.19,0.202,0.315,0.941,0.37,0.643,0.985,0.052,0.613


In [56]:
len(df)

20

In [53]:
### fake data with time-stamp row index
df = pd.DataFrame(np.random.rand(20,10))
df.index = pd.date_range('5/1/2018', periods=len(df), freq='M')
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
2018-05-31,0.453,0.265,0.158,0.755,0.433,0.77,0.964,0.702,0.536,0.631
2018-06-30,0.837,0.18,0.692,0.39,0.503,0.454,0.029,0.504,0.845,0.833
2018-07-31,0.7,0.915,0.665,0.673,0.55,0.19,0.069,0.455,0.143,0.718
2018-08-31,0.05,0.022,0.026,0.278,0.837,0.018,0.418,0.468,0.959,0.148
2018-09-30,0.327,0.606,0.486,0.273,0.985,0.903,0.6,0.754,0.826,0.377
2018-10-31,0.41,0.649,0.883,0.622,0.287,0.802,0.235,0.191,0.332,0.713
2018-11-30,0.864,0.175,0.172,0.279,0.993,0.756,0.214,0.187,0.254,0.779
2018-12-31,0.88,0.657,0.568,0.691,0.644,0.263,0.539,0.301,0.79,0.441
2019-01-31,0.642,0.415,0.724,0.37,0.506,0.566,0.223,0.998,0.285,0.407
2019-02-28,0.48,0.72,0.223,0.863,0.116,0.081,0.187,0.113,0.58,0.049


#### DF with alphabetic row and column indexes and a groupable variable

In [58]:
import string
import random
rows = 52
cols = 5
# min/max row count
assert(1 <= rows <= 52)
df = pd.DataFrame(np.random.randn(rows,cols),columns=['c'+str(i) for i in range(cols)],index=list((string.ascii_uppercase + string.ascii_lowercase)[0:rows]))
df['groupable']=[random.choice('abcde') for _ in range(rows)]
df

Unnamed: 0,c0,c1,c2,c3,c4,groupable
A,-1.255,-0.446,-2.399,0.193,-0.340,d
B,-0.086,-0.514,-0.107,-1.332,1.268,c
C,0.048,0.787,0.723,-0.287,-0.083,b
D,0.386,-0.052,-0.262,1.205,-1.304,b
E,0.442,0.164,-0.393,1.492,1.390,c
F,0.796,1.614,-0.496,0.945,0.116,a
G,0.452,1.684,1.654,2.269,-0.411,b
H,-1.611,-0.837,-0.213,-0.620,-0.416,c
I,-0.571,0.540,0.552,1.711,-1.820,b
J,-0.866,-0.478,-0.007,-0.405,-1.016,d


### Saving a DataFrame

#### Saving DataFrame to csv

In [61]:
df.to_csv("x1.csv",encoding='utf-8')

#### Saving DataFrame to excel workbook

In [62]:
from pandas import ExcelWriter

writer = ExcelWriter('f1.xlsx')
df.to_excel(writer,'Sheet1')
writer.save()

#### Saving DataFrame to MySQL

#### Saving tp Python Objects

In [63]:
d = df.to_dict() # to dictionary
str1 = df.to_string() # to string
m = df.as_matrix() # to numpy matrix

  app.launch_new_instance()


## Working with the Whole Dataframe

### Peek at the DataFrame contents

#### Get index and datatypes

In [70]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 52 entries, A to z
Data columns (total 6 columns):
c0           52 non-null float64
c1           52 non-null float64
c2           52 non-null float64
c3           52 non-null float64
c4           52 non-null float64
groupable    52 non-null object
dtypes: float64(5), object(1)
memory usage: 2.8+ KB


#### Get first i rows

In [71]:
df.head(2)

Unnamed: 0,c0,c1,c2,c3,c4,groupable
A,-1.255,-0.446,-2.399,0.193,-0.34,d
B,-0.086,-0.514,-0.107,-1.332,1.268,c


#### Get last i rows

In [73]:
df.tail(2)

Unnamed: 0,c0,c1,c2,c3,c4,groupable
y,0.528,-1.2,0.564,-0.567,-1.325,b
z,-1.02,-2.262,1.081,-0.608,0.1,b


#### Get summary stats cols

In [74]:
df.describe()

Unnamed: 0,c0,c1,c2,c3,c4
count,52.0,52.0,52.0,52.0,52.0
mean,0.015,-0.138,0.039,0.109,-0.221
std,1.133,0.989,0.969,1.094,1.047
min,-3.003,-2.262,-2.399,-3.227,-3.149
25%,-0.802,-0.857,-0.501,-0.577,-0.855
50%,0.114,-0.252,0.089,0.159,-0.186
75%,0.828,0.606,0.597,0.833,0.409
max,2.196,1.921,2.28,2.269,1.945


In [77]:
df.head(3)

Unnamed: 0,c0,c1,c2,c3,c4,groupable
A,-1.255,-0.446,-2.399,0.193,-0.34,d
B,-0.086,-0.514,-0.107,-1.332,1.268,c
C,0.048,0.787,0.723,-0.287,-0.083,b


In [75]:
top_lft_corner_df = df.iloc[:2,:2]

In [76]:
top_lft_corner_df

Unnamed: 0,c0,c1
A,-1.255,-0.446
B,-0.086,-0.514


### DataFrame non-indexing attributes

#### Transpose rows and columns

In [79]:
df = df.T

In [80]:
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z
c0,-1.26,-0.0861,0.0476,0.386,0.442,0.796,0.452,-1.61,-0.571,-0.866,0.787,-1.11,-0.211,-1.07,2.17,2.2,0.932,-1.52,-0.513,0.796,0.647,1.44,0.947,-0.796,-1.06,-0.758,-0.198,0.569,-0.228,-2.1,0.598,0.0146,0.518,-3,1.17,-0.504,1.38,1.45,0.951,1.01,-1.22,1.39,0.181,0.923,-2.25,1.47,-0.524,-0.694,0.568,-0.819,0.528,-1.02
c1,-0.446,-0.514,0.787,-0.0523,0.164,1.61,1.68,-0.837,0.54,-0.478,-1.49,-0.855,0.736,0.465,-1.46,0.603,-0.745,-1.27,1.89,1.1,0.225,-0.522,1.92,-1.68,-0.176,-0.0815,0.639,-0.0463,-1.41,0.133,-0.864,0.735,0.614,0.249,-0.712,1.73,0.734,-1.14,0.641,-0.999,0.186,0.597,-0.904,-0.853,-0.531,-0.59,-0.373,-0.329,-1.14,-1.23,-1.2,-2.26
c2,-2.4,-0.107,0.723,-0.262,-0.393,-0.496,1.65,-0.213,0.552,-0.00739,0.598,-0.0423,-0.119,0.205,-0.742,-0.548,0.0881,1.39,-1.76,-0.647,0.65,-0.578,1.06,-0.592,-0.518,0.474,0.253,-1.74,-0.771,-1.68,-0.394,2.1,0.0902,0.477,0.49,0.288,0.114,-1.98,0.269,0.775,0.298,0.935,0.596,-0.0221,2.28,-0.054,0.848,-0.0795,-1.66,0.996,0.564,1.08
c3,0.193,-1.33,-0.287,1.21,1.49,0.945,2.27,-0.62,1.71,-0.405,0.726,0.108,-0.0985,-0.232,-0.176,0.862,-0.317,0.116,-1.7,-1.64,-0.755,0.471,-2.47,1.52,-0.929,1.61,0.279,0.148,1.92,0.493,-0.713,0.17,0.445,-0.41,-1.08,-0.889,-0.212,-3.23,-0.907,0.766,0.716,1.2,0.835,0.0132,0.907,0.558,0.524,1.42,0.833,0.804,-0.567,-0.608
c4,-0.34,1.27,-0.0826,-1.3,1.39,0.116,-0.411,-0.416,-1.82,-1.02,0.923,-1.22,0.579,0.127,-0.221,-0.308,1.51,-1.43,-0.809,-0.258,0.802,0.365,1.12,1.13,0.134,0.226,-1.76,0.779,0.465,-0.507,0.391,-0.963,0.202,-1.28,0.823,-2.47,1.94,-0.532,-0.819,-0.15,-0.513,-0.58,0.271,0.949,-0.237,-1.48,0.0202,0.0396,-3.15,-1.78,-1.33,0.1
groupable,d,c,b,b,c,a,b,c,b,d,c,d,a,b,d,b,c,e,e,b,d,d,e,c,a,e,d,b,b,d,e,e,a,d,e,d,b,c,b,a,a,c,e,c,d,d,c,b,b,b,b,b


#### list row and column indexes

In [83]:
l =df.axes

In [84]:
l

[Index(['c0', 'c1', 'c2', 'c3', 'c4', 'groupable'], dtype='object'),
 Index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
        'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',
        'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
        'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'],
       dtype='object')]

In [85]:
(r_idx, c_idx) =df.axes

In [86]:
r_idx

Index(['c0', 'c1', 'c2', 'c3', 'c4', 'groupable'], dtype='object')

In [87]:
 c_idx

Index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
       'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',
       'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
       'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'],
      dtype='object')

##### Series column data types

In [91]:
 
s = df.dtypes 

In [92]:
s

A    object
B    object
C    object
D    object
E    object
F    object
G    object
H    object
I    object
J    object
K    object
L    object
      ...  
o    object
p    object
q    object
r    object
s    object
t    object
u    object
v    object
w    object
x    object
y    object
z    object
Length: 52, dtype: object

#### Check if dataframe is empty 

In [94]:
b = df.empty
b

False

#### check number of axes 

In [95]:
df.ndim

2

#### check shape

In [96]:
df.shape

(6, 52)

#### check size (row * column)


In [99]:
df.size

312

#### get numpy array for the df

In [101]:
df.values

array([[-1.255330975976439, -0.08605004118654468, 0.04755105522697168,
        0.3864977735521899, 0.44200633167842557, 0.7964478160377733,
        0.45194045955447215, -1.611270451465964, -0.5706691241816961,
        -0.8661263652581783, 0.7869979994715699, -1.1075920170377904,
        -0.21138827986298683, -1.0699431091004332, 2.1702139393194546,
        2.1957861983231597, 0.9318305926714793, -1.5237908730118168,
        -0.5129513571906763, 0.7958316152591067, 0.646757733682412,
        1.441108089450332, 0.9467017999377692, -0.7956502900910675,
        -1.0623428674869009, -0.7580757799484878, -0.19754393641756163,
        0.5689697071184057, -0.22822608388418528, -2.104404824680125,
        0.5981111765390439, 0.014587318911385426, 0.517934003615038,
        -3.002764359809732, 1.1724838776977644, -0.5043029541075144,
        1.3811934137349695, 1.4468855316747409, 0.9509878877874001,
        1.0057876071683658, -1.215633925276018, 1.3864086862563734,
        0.18140686449149812,

### Dataframe utility methods 

##### Copy a dataframe

In [102]:
df = df.copy() 

#### Rank each column

In [106]:
df.rank()

<bound method NDFrame.rank of                A       B       C       D      E      F      G      H      I  \
c0         -1.26 -0.0861  0.0476   0.386  0.442  0.796  0.452  -1.61 -0.571   
c1        -0.446  -0.514   0.787 -0.0523  0.164   1.61   1.68 -0.837   0.54   
c2          -2.4  -0.107   0.723  -0.262 -0.393 -0.496   1.65 -0.213  0.552   
c3         0.193   -1.33  -0.287    1.21   1.49  0.945   2.27  -0.62   1.71   
c4         -0.34    1.27 -0.0826    -1.3   1.39  0.116 -0.411 -0.416  -1.82   
groupable      d       c       b       b      c      a      b      c      b   

                 J      K       L       M      N      O      P       Q      R  \
c0          -0.866  0.787   -1.11  -0.211  -1.07   2.17    2.2   0.932  -1.52   
c1          -0.478  -1.49  -0.855   0.736  0.465  -1.46  0.603  -0.745  -1.27   
c2        -0.00739  0.598 -0.0423  -0.119  0.205 -0.742 -0.548  0.0881   1.39   
c3          -0.405  0.726   0.108 -0.0985 -0.232 -0.176  0.862  -0.317  0.116   
c4         

In [119]:
df = pd.DataFrame(np.random.randn(20,10))

In [120]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-1.577,-1.641,-0.154,0.223,0.775,-0.704,-0.223,-1.304,0.873,1.323
1,0.53,-1.371,-0.495,0.762,0.052,0.675,-0.02,-0.998,-0.796,0.403
2,2.062,0.824,-0.448,0.52,-1.597,0.536,1.15,0.268,2.238,0.65
3,-0.956,0.16,1.466,2.001,1.498,0.051,-0.881,0.504,-1.601,-0.27
4,0.406,-0.911,-0.89,-1.53,1.208,0.014,-0.715,2.445,0.416,0.06
5,0.836,0.097,0.014,0.469,0.817,0.793,-1.484,-0.153,0.856,0.479
6,0.67,0.414,0.75,0.014,-0.626,1.552,0.235,0.876,-0.366,-0.527
7,0.223,-0.174,-0.476,0.816,1.785,0.952,-1.446,-0.872,0.776,-0.212
8,-0.718,1.123,0.354,-0.591,-0.026,1.179,-0.357,1.609,1.574,0.845
9,0.101,1.34,0.03,-1.431,-0.408,0.114,-0.757,-0.531,-0.775,1.437


In [121]:
df = df.rank() # Compute numerical data ranks (1 through n) along axis. Equal values are assigned a rank that is the average of the ranks of those values

In [122]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.0,2.0,11.0,13.0,15.0,5.0,13.0,4.0,17.0,18.0
1,15.0,5.0,4.0,16.0,11.0,14.0,14.0,7.0,5.0,11.0
2,20.0,14.0,6.0,15.0,1.0,13.0,20.0,13.0,20.0,15.0
3,3.0,10.0,19.0,19.0,19.0,10.0,7.0,16.0,2.0,7.0
4,14.0,7.0,2.0,4.0,18.0,9.0,9.0,20.0,12.0,10.0
5,18.0,9.0,13.0,14.0,16.0,16.0,2.0,11.0,16.0,13.0
6,16.0,11.0,17.0,12.0,4.0,20.0,17.0,17.0,7.0,5.0
7,13.0,8.0,5.0,17.0,20.0,17.0,4.0,8.0,15.0,8.0
8,4.0,18.0,16.0,7.0,10.0,18.0,10.0,18.0,18.0,16.0
9,8.0,19.0,14.0,5.0,7.0,11.0,8.0,9.0,6.0,19.0


In [124]:
df = df.sort_values(by=0)

In [125]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.0,2.0,11.0,13.0,15.0,5.0,13.0,4.0,17.0,18.0
19,2.0,12.0,8.0,9.0,9.0,19.0,5.0,3.0,14.0,20.0
3,3.0,10.0,19.0,19.0,19.0,10.0,7.0,16.0,2.0,7.0
8,4.0,18.0,16.0,7.0,10.0,18.0,10.0,18.0,18.0,16.0
14,5.0,3.0,20.0,6.0,8.0,1.0,15.0,12.0,13.0,12.0
10,6.0,20.0,15.0,2.0,13.0,15.0,1.0,10.0,10.0,3.0
11,7.0,15.0,12.0,3.0,12.0,4.0,6.0,6.0,9.0,14.0
9,8.0,19.0,14.0,5.0,7.0,11.0,8.0,9.0,6.0,19.0
16,9.0,4.0,18.0,18.0,14.0,7.0,16.0,15.0,8.0,1.0
15,10.0,16.0,10.0,20.0,17.0,3.0,12.0,2.0,19.0,6.0


In [126]:
df = df.sort_values(by =[0,1])

In [127]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.0,2.0,11.0,13.0,15.0,5.0,13.0,4.0,17.0,18.0
19,2.0,12.0,8.0,9.0,9.0,19.0,5.0,3.0,14.0,20.0
3,3.0,10.0,19.0,19.0,19.0,10.0,7.0,16.0,2.0,7.0
8,4.0,18.0,16.0,7.0,10.0,18.0,10.0,18.0,18.0,16.0
14,5.0,3.0,20.0,6.0,8.0,1.0,15.0,12.0,13.0,12.0
10,6.0,20.0,15.0,2.0,13.0,15.0,1.0,10.0,10.0,3.0
11,7.0,15.0,12.0,3.0,12.0,4.0,6.0,6.0,9.0,14.0
9,8.0,19.0,14.0,5.0,7.0,11.0,8.0,9.0,6.0,19.0
16,9.0,4.0,18.0,18.0,14.0,7.0,16.0,15.0,8.0,1.0
15,10.0,16.0,10.0,20.0,17.0,3.0,12.0,2.0,19.0,6.0


In [128]:
df = df.sort_index()
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.0,2.0,11.0,13.0,15.0,5.0,13.0,4.0,17.0,18.0
1,15.0,5.0,4.0,16.0,11.0,14.0,14.0,7.0,5.0,11.0
2,20.0,14.0,6.0,15.0,1.0,13.0,20.0,13.0,20.0,15.0
3,3.0,10.0,19.0,19.0,19.0,10.0,7.0,16.0,2.0,7.0
4,14.0,7.0,2.0,4.0,18.0,9.0,9.0,20.0,12.0,10.0
5,18.0,9.0,13.0,14.0,16.0,16.0,2.0,11.0,16.0,13.0
6,16.0,11.0,17.0,12.0,4.0,20.0,17.0,17.0,7.0,5.0
7,13.0,8.0,5.0,17.0,20.0,17.0,4.0,8.0,15.0,8.0
8,4.0,18.0,16.0,7.0,10.0,18.0,10.0,18.0,18.0,16.0
9,8.0,19.0,14.0,5.0,7.0,11.0,8.0,9.0,6.0,19.0


In [129]:
df = df.astype(int)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,2,11,13,15,5,13,4,17,18
1,15,5,4,16,11,14,14,7,5,11
2,20,14,6,15,1,13,20,13,20,15
3,3,10,19,19,19,10,7,16,2,7
4,14,7,2,4,18,9,9,20,12,10
5,18,9,13,14,16,16,2,11,16,13
6,16,11,17,12,4,20,17,17,7,5
7,13,8,5,17,20,17,4,8,15,8
8,4,18,16,7,10,18,10,18,18,16
9,8,19,14,5,7,11,8,9,6,19


#### DataFrame Iteration methods

In [None]:
df.iteritems() # (col-index, Series) pairs)
df.iterrows() #row index, Series) pairs

#### df.iteritems() # (col-index, Series) pairs)

In [130]:
for name, series in df.iteritems():
    print('\n Col name: ' + str(name))
    print('1st val: ' + str(series.iat[0]))


 Col name: 0
1st val: 1

 Col name: 1
1st val: 2

 Col name: 2
1st val: 11

 Col name: 3
1st val: 13

 Col name: 4
1st val: 15

 Col name: 5
1st val: 5

 Col name: 6
1st val: 13

 Col name: 7
1st val: 4

 Col name: 8
1st val: 17

 Col name: 9
1st val: 18


#### df.iterrows() #row index, Series) pairs

In [131]:
for name, series in df.iterrows():
    print("\n Row name: "+ str(name))
    print("\n 1 st val: " + str(series.iat[0]))


 Row name: 0

 1 st val: 1

 Row name: 1

 1 st val: 15

 Row name: 2

 1 st val: 20

 Row name: 3

 1 st val: 3

 Row name: 4

 1 st val: 14

 Row name: 5

 1 st val: 18

 Row name: 6

 1 st val: 16

 Row name: 7

 1 st val: 13

 Row name: 8

 1 st val: 4

 Row name: 9

 1 st val: 8

 Row name: 10

 1 st val: 6

 Row name: 11

 1 st val: 7

 Row name: 12

 1 st val: 19

 Row name: 13

 1 st val: 17

 Row name: 14

 1 st val: 5

 Row name: 15

 1 st val: 10

 Row name: 16

 1 st val: 9

 Row name: 17

 1 st val: 12

 Row name: 18

 1 st val: 11

 Row name: 19

 1 st val: 2
