For detailed explanation of this examples refer to [pandas-shuffle- DataFrame-rows](https://sparkbyexamples.com/pandas/pandas-shuffle-DataFrame-rows-examples)


# Pandas Shuffle DataFrame Rows Examples

In [1]:
 #Create a DataFrame with a Dictionary of Lists
    
import pandas as pd
technologies = {
    'Courses':["Spark","PySpark","Hadoop","Python","pandas","Oracle","Java"],
    'Fee' :[20000,25000,26000,22000,24000,21000,22000],
    'Duration':['30day','40days','35days','40days','60days','50days','55days'],
    'Discount':[1000,2300,1500,1200,2500,2100,2000]
               }
df = pd.DataFrame(technologies)
df


Unnamed: 0,Courses,Fee,Duration,Discount
0,Spark,20000,30day,1000
1,PySpark,25000,40days,2300
2,Hadoop,26000,35days,1500
3,Python,22000,40days,1200
4,pandas,24000,60days,2500
5,Oracle,21000,50days,2100
6,Java,22000,55days,2000


In [2]:
# shuffle the DataFrame rows & return all rows
df1 = df.sample(frac = 1)
df1


   Courses    Fee Duration  Discount
3   Python  22000   40days      1200
4   pandas  24000   60days      2500
5   Oracle  21000   50days      2100
0    Spark  20000    30day      1000
6     Java  22000   55days      2000
1  PySpark  25000   40days      2300
2   Hadoop  26000   35days      1500


In [3]:
# Create a new Index starting from zero
df1 = df.sample(frac = 1).reset_index()
df1


Unnamed: 0,index,Courses,Fee,Duration,Discount
0,4,pandas,24000,60days,2500
1,6,Java,22000,55days,2000
2,3,Python,22000,40days,1200
3,5,Oracle,21000,50days,2100
4,1,PySpark,25000,40days,2300
5,2,Hadoop,26000,35days,1500
6,0,Spark,20000,30day,1000


In [4]:
# Drop shuffle Index
df1 = df.sample(frac = 1).reset_index(drop=True)
df1


Unnamed: 0,Courses,Fee,Duration,Discount
0,Spark,20000,30day,1000
1,Java,22000,55days,2000
2,PySpark,25000,40days,2300
3,Python,22000,40days,1200
4,pandas,24000,60days,2500
5,Oracle,21000,50days,2100
6,Hadoop,26000,35days,1500


In [6]:
# Using numpy.random.shuffle to Change Order of Rows
import numpy as np
# Using numpy permutation() method to shuffle DataFrame rows
df1 = df.iloc[np.random.permutation(df.index)].reset_index(drop=True)
df1


Unnamed: 0,Courses,Fee,Duration,Discount
0,Oracle,21000,50days,2100
1,Java,22000,55days,2000
2,pandas,24000,60days,2500
3,Python,22000,40days,1200
4,Hadoop,26000,35days,1500
5,Spark,20000,30day,1000
6,PySpark,25000,40days,2300


In [8]:
# Using sklearn to shuffle rows
from sklearn.utils import shuffle
df = shuffle(df)
df


Unnamed: 0,Courses,Fee,Duration,Discount
6,Java,22000,55days,2000
2,Hadoop,26000,35days,1500
3,Python,22000,40days,1200
4,pandas,24000,60days,2500
1,PySpark,25000,40days,2300
0,Spark,20000,30day,1000
5,Oracle,21000,50days,2100


In [9]:
# Using apply() method to shuffle the DataFrame rows
import numpy as np
df1 = df.apply(np.random.permutation, axis=1)    
df1


6       [55days, Java, 22000, 2000]
2     [35days, 26000, 1500, Hadoop]
3     [1200, Python, 22000, 40days]
4     [60days, 2500, 24000, pandas]
1    [PySpark, 25000, 2300, 40days]
0       [1000, Spark, 20000, 30day]
5     [Oracle, 21000, 50days, 2100]
dtype: object

In [10]:
# Using lambda method to Shuffle/permutating DataFrame rows
df2 = df.apply(lambda x: x.sample(frac=1).values)
df2


Unnamed: 0,Courses,Fee,Duration,Discount
6,Java,22000,50days,2100
2,Oracle,20000,40days,1500
3,pandas,24000,40days,2300
4,Spark,25000,55days,1000
1,PySpark,21000,35days,1200
0,Hadoop,22000,30day,2000
5,Python,26000,60days,2500


In [11]:
# Using sample() method to shuffle DataFrame rows and columns
df2 = df.sample(frac=1, axis=1).sample(frac=1).reset_index(drop=True)
df2


Unnamed: 0,Fee,Discount,Courses,Duration
0,22000,2000,Java,55days
1,22000,1200,Python,40days
2,21000,2100,Oracle,50days
3,20000,1000,Spark,30day
4,26000,1500,Hadoop,35days
5,25000,2300,PySpark,40days
6,24000,2500,pandas,60days
