### Create a dataframe by reading a CSV file

In [50]:
import os
import numpy as np
import pandas as pd

path = "./data/"
filename_read = os.path.join(path,"uci_heartDisease_changed.csv")
df = pd.read_csv(filename_read, na_values=['NaN','?'])
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0,2
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45,1,1,110,264,0,0,132,0,1.2,2,0.0,7.0,1
299,68,1,4,144,193,1,0,141,0,3.4,2,2.0,7.0,2
300,57,1,4,130,131,0,0,115,1,1.2,2,1.0,7.0,3
301,57,0,2,130,236,0,2,174,0,0.0,2,1.0,3.0,1


### Suffle the dataframe and save it

In [51]:

df = df.reindex(np.random.permutation(df.index))
df


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
90,66,1,4,120,302,0,2,151,0,0.4,2,0.0,3.0,0
285,58,1,4,114,318,0,1,140,0,4.4,3,3.0,6.0,4
129,62,0,4,124,209,0,0,163,0,0.0,1,0.0,3.0,0
145,47,1,3,108,243,0,0,152,0,0.0,1,0.0,3.0,1
181,56,0,4,134,409,0,2,150,1,1.9,2,2.0,7.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,63,0,4,124,197,0,0,136,1,0.0,2,0.0,3.0,1
256,67,0,4,106,223,0,0,142,0,0.3,1,2.0,3.0,0
229,66,1,4,112,212,0,2,132,1,0.1,1,1.0,3.0,2
118,63,1,4,130,330,1,2,132,1,1.8,1,3.0,7.0,3


Can use `reset_index` to reset index from 0 maintaining shuffled order

In [52]:
df.reset_index(inplace=True, drop=True)
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,66,1,4,120,302,0,2,151,0,0.4,2,0.0,3.0,0
1,58,1,4,114,318,0,1,140,0,4.4,3,3.0,6.0,4
2,62,0,4,124,209,0,0,163,0,0.0,1,0.0,3.0,0
3,47,1,3,108,243,0,0,152,0,0.0,1,0.0,3.0,1
4,56,0,4,134,409,0,2,150,1,1.9,2,2.0,7.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,63,0,4,124,197,0,0,136,1,0.0,2,0.0,3.0,1
299,67,0,4,106,223,0,0,142,0,0.3,1,2.0,3.0,0
300,66,1,4,112,212,0,2,132,1,0.1,1,1.0,3.0,2
301,63,1,4,130,330,1,2,132,1,1.8,1,3.0,7.0,3


In [53]:
filename_write = os.path.join(path,"uci_heartDisease_changed-shuffled.csv")
df.to_csv(filename_write,index=False)
print("Saved file: {}".format(filename_write))

Saved file: ./data/uci_heartDisease_changed-shuffled.csv


### Sort the dataframe and save it

In [54]:
print("Before sorting")
print(df['age']) #print the age column before sorting it
df = df.sort_values(by='age',ascending=True)

print("\nAfter sorting")
print(df['age']) #print the age column after sorting it

Before sorting
0      66
1      58
2      62
3      47
4      56
       ..
298    63
299    67
300    66
301    63
302    50
Name: age, Length: 303, dtype: int64

After sorting
227    29
274    34
243    34
280    35
137    35
       ..
190    71
103    71
133    74
285    76
172    77
Name: age, Length: 303, dtype: int64


In [55]:
print("df['age'].iloc[0] is: {}".format(df['age'].iloc[0]))

print("df['age'].loc[0] is: {}".format(df['age'].loc[0])) 

df['age'].iloc[0] is: 29
df['age'].loc[0] is: 66


iloc gets rows (or columns) at particular positions in the index (so it only takes integers).  
loc gets rows (or columns) with particular labels from the index.  
Therefore, if you want to get a value of youngest age, you should use `.iloc[0]` after sort.

In [56]:
filename_write = os.path.join(path,"uci_heartDisease_changed-sorted.csv")
df.to_csv(filename_write,index=False)   # Specify index = false to not write row numbers
print("Saved file: {}".format(filename_write))

Saved file: ./data/uci_heartDisease_changed-sorted.csv
