In [55]:
import pandas as pd
import numpy as np

### 1 - Iterating a DataFrame

#### 1. Lets generate the Date Series

In [83]:
N = 20
pd.date_range(start='2023-06-01',periods=N,freq='D')

DatetimeIndex(['2023-06-01', '2023-06-02', '2023-06-03', '2023-06-04',
               '2023-06-05', '2023-06-06', '2023-06-07', '2023-06-08',
               '2023-06-09', '2023-06-10', '2023-06-11', '2023-06-12',
               '2023-06-13', '2023-06-14', '2023-06-15', '2023-06-16',
               '2023-06-17', '2023-06-18', '2023-06-19', '2023-06-20'],
              dtype='datetime64[ns]', freq='D')

#### 2 - Lets generate random numbers

In [57]:
np.random.rand(N)

array([0.41010846, 0.48343454, 0.47713678, 0.94829305, 0.84326575,
       0.41628444, 0.03806034, 0.22532764, 0.98012377, 0.04589431,
       0.48569782, 0.33357896, 0.25612149, 0.13831649, 0.04458034,
       0.67813386, 0.25577147, 0.71064674, 0.42188594, 0.87488371])

In [58]:
np.random.choice(['Low','Medium','High'],N)

array(['Medium', 'High', 'Medium', 'Low', 'High', 'Low', 'High', 'Low',
       'Low', 'Low', 'Medium', 'Medium', 'Low', 'Low', 'Medium', 'High',
       'High', 'Low', 'Low', 'High'], dtype='<U6')

In [59]:
# Draw random samples from a normal (Gaussian) distribution.
np.random.normal(100, 10, size=(N))

array([ 88.58572949, 107.05062426, 104.4377111 , 104.2257495 ,
        75.90771859,  95.89090355,  95.66143164,  78.38000595,
       109.4311915 ,  89.5526743 , 111.60954781, 106.16380657,
        94.09510089, 100.42311984,  86.66038669, 100.00521684,
       104.37220188,  88.13089002, 106.22168976, 107.99294386])

#### 3 - Lets Create a Dataframe

In [60]:
N=20
df = pd.DataFrame({
   'A': pd.date_range(start='2016-01-01',periods=N,freq='D'),
   'x': np.linspace(0,stop=N-1,num=N),
   'y': np.random.rand(N),
   'C': np.random.choice(['Low','Medium','High'],N).tolist(),
   'D': np.random.normal(100, 10, size=(N)).tolist()
   })

In [61]:
df

Unnamed: 0,A,x,y,C,D
0,2016-01-01,0.0,0.721161,High,108.85607
1,2016-01-02,1.0,0.500571,Low,99.052758
2,2016-01-03,2.0,0.507133,Medium,98.384136
3,2016-01-04,3.0,0.106148,Medium,91.879507
4,2016-01-05,4.0,0.355665,Low,110.080523
5,2016-01-06,5.0,0.891471,Low,105.483246
6,2016-01-07,6.0,0.148109,Medium,88.337556
7,2016-01-08,7.0,0.277458,Low,86.065513
8,2016-01-09,8.0,0.0999,Medium,100.486656
9,2016-01-10,9.0,0.776405,Low,107.222648


#### 4 - iteritems() − to iterate over the (key,value) pairs

In [85]:
df = pd.DataFrame(np.random.randn(4,3),columns=['col1','col2','col3'])
df

Unnamed: 0,col1,col2,col3
0,-1.315915,-0.648712,2.667033
1,0.724735,-1.533665,-1.871811
2,0.372518,-1.290188,0.906363
3,1.035102,-1.46605,2.68358


In [86]:
for key,value in df.iteritems():
    print (key)

col1
col2
col3


In [64]:
for key,value in df.iteritems():
    print(f"Column is == >>{key}")
    print (value)
    print("\n")

Column is == >>col1
0   -1.328081
1    1.237390
2   -0.465800
3   -0.718663
Name: col1, dtype: float64


Column is == >>col2
0   -0.290021
1    0.135677
2    1.496161
3    0.259189
Name: col2, dtype: float64


Column is == >>col3
0    0.652622
1    0.216244
2   -0.632950
3   -0.314424
Name: col3, dtype: float64




#### 5 - iterrows() − iterate over the rows as (index,series) pairs

In [87]:
df = pd.DataFrame(np.random.randn(4,3),columns=['col1','col2','col3'])
df

Unnamed: 0,col1,col2,col3
0,0.649502,-1.542447,0.480103
1,-0.31979,-0.732699,0.595337
2,-0.248663,0.223678,0.692423
3,1.75497,-0.105402,-1.1165


In [89]:
for row_index,row in df.iterrows():
    print (f"Row = {row_index}")
    print (row)
    print("\n")

Row = 0
col1    0.649502
col2   -1.542447
col3    0.480103
Name: 0, dtype: float64


Row = 1
col1   -0.319790
col2   -0.732699
col3    0.595337
Name: 1, dtype: float64


Row = 2
col1   -0.248663
col2    0.223678
col3    0.692423
Name: 2, dtype: float64


Row = 3
col1    1.754970
col2   -0.105402
col3   -1.116500
Name: 3, dtype: float64




#### 6 - itertuples() − iterate over the rows as namedtuples

In [90]:
df = pd.DataFrame(np.random.randn(4,3),columns = ['col1','col2','col3'])
df

Unnamed: 0,col1,col2,col3
0,-0.446442,1.885162,-0.770706
1,0.238222,-0.240666,-1.985975
2,0.340084,-0.122764,-0.37443
3,-1.004851,1.546645,-0.393798


In [91]:
for row in df.itertuples():
    print (row)

Pandas(Index=0, col1=-0.4464417894081232, col2=1.8851617691929068, col3=-0.7707058960888813)
Pandas(Index=1, col1=0.2382218489307575, col2=-0.24066587311993493, col3=-1.9859745731567424)
Pandas(Index=2, col1=0.34008393585921975, col2=-0.12276430643381961, col3=-0.37442978827489815)
Pandas(Index=3, col1=-1.0048514869355554, col2=1.5466445804963118, col3=-0.3937983766189399)


### 2 - Sorting

#### 1 - First Lets create a data frame

In [92]:
unsorted_df = pd.DataFrame(np.random.randn(10,2),index=[1,4,6,2,3,5,9,8,0,7],columns=['col2','col1'])
unsorted_df

Unnamed: 0,col2,col1
1,0.350641,-0.989373
4,-1.48443,-1.862344
6,-0.462649,2.381045
2,1.142768,1.736563
3,0.803376,0.765247
5,0.773799,0.897591
9,0.164728,-0.202066
8,1.12116,-0.491281
0,-0.531161,2.804994
7,-0.66846,-0.878636


#### 2 - By Label
 - Using the sort_index() method, by passing the axis arguments and the order of sorting, DataFrame can be sorted. By default, sorting is done on row labels in ascending order.

In [70]:
sorted_df=unsorted_df.sort_index()
sorted_df

Unnamed: 0,col2,col1
0,0.190501,-0.853164
1,1.095443,-1.603677
2,-1.641873,1.170162
3,0.524917,-0.475396
4,1.641632,-0.025909
5,0.625172,1.089772
6,0.999326,-0.70056
7,0.461067,0.317367
8,-0.291665,0.652772
9,1.399464,-1.033214


In [71]:
sorted_df = unsorted_df.sort_index(ascending=False)

sorted_df

Unnamed: 0,col2,col1
9,1.399464,-1.033214
8,-0.291665,0.652772
7,0.461067,0.317367
6,0.999326,-0.70056
5,0.625172,1.089772
4,1.641632,-0.025909
3,0.524917,-0.475396
2,-1.641873,1.170162
1,1.095443,-1.603677
0,0.190501,-0.853164


In [72]:
sorted_df = unsorted_df.sort_index(axis=1)

sorted_df

Unnamed: 0,col1,col2
1,-1.603677,1.095443
4,-0.025909,1.641632
6,-0.70056,0.999326
2,1.170162,-1.641873
3,-0.475396,0.524917
5,1.089772,0.625172
9,-1.033214,1.399464
8,0.652772,-0.291665
0,-0.853164,0.190501
7,0.317367,0.461067


#### 2 - By Value
- Like index sorting, sort_values() is the method for sorting by values. It accepts a 'by' argument which will use the column name of the DataFrame with which the values are to be sorted.

In [93]:
unsorted_df = pd.DataFrame({'col1':[2,1,1,1],'col2':[1,3,2,4]})
unsorted_df

Unnamed: 0,col1,col2
0,2,1
1,1,3
2,1,2
3,1,4


In [74]:
sorted_df = unsorted_df.sort_values(by='col1')

sorted_df

Unnamed: 0,col1,col2
1,1,3
2,1,2
3,1,4
0,2,1


In [75]:
sorted_df = unsorted_df.sort_values(by=['col1','col2'])

sorted_df

Unnamed: 0,col1,col2
2,1,2
1,1,3
3,1,4
0,2,1


### 3 - Reindexing

#### Reindexing changes the row labels and column labels of a DataFrame. 
- To reindex means to conform the data to match a given set of labels along a particular axis.
- Multiple operations can be accomplished through indexing like −
  - Reorder the existing data to match a new set of labels.
  - Insert missing value (NA) markers in label locations where no data for the label existed.

In [94]:
N=20

df = pd.DataFrame({
   'A': pd.date_range(start='2016-01-01',periods=N,freq='D'),
   'x': np.linspace(0,stop=N-1,num=N),
   'y': np.random.rand(N),
   'C': np.random.choice(['Low','Medium','High'],N).tolist(),
   'D': np.random.normal(100, 10, size=(N)).tolist()
})

df

Unnamed: 0,A,x,y,C,D
0,2016-01-01,0.0,0.410779,Low,106.316822
1,2016-01-02,1.0,0.652152,Low,108.785737
2,2016-01-03,2.0,0.955189,Low,100.988719
3,2016-01-04,3.0,0.152216,Low,77.808933
4,2016-01-05,4.0,0.116874,Low,102.847377
5,2016-01-06,5.0,0.947267,Medium,102.037109
6,2016-01-07,6.0,0.915405,Medium,100.236146
7,2016-01-08,7.0,0.618723,High,104.625993
8,2016-01-09,8.0,0.146507,Medium,105.565293
9,2016-01-10,9.0,0.671219,Medium,98.753105


In [77]:
#reindex the DataFrame
df_reindexed = df.reindex(index=[0,2,5], columns=['A', 'C', 'B'])

df_reindexed

Unnamed: 0,A,C,B
0,2016-01-01,Low,
2,2016-01-03,Medium,
5,2016-01-06,Low,


#### 1 - Reindex to Align with Other Objects

In [78]:
df1 = pd.DataFrame(np.random.randn(10,3),columns=['col1','col2','col3'])
df2 = pd.DataFrame(np.random.randn(7,3),columns=['col1','col2','col3'],index=[1,2,10,11,3,5,7])

In [79]:
df1

Unnamed: 0,col1,col2,col3
0,-0.776025,-0.946141,1.056522
1,-0.638992,0.150211,1.680077
2,-0.473994,-1.580906,1.050541
3,0.37363,1.020251,-0.899896
4,0.61073,0.994221,-0.448258
5,0.892144,0.531701,-1.367893
6,0.06035,0.761378,0.943671
7,-0.027962,0.607262,1.213213
8,-0.357279,-0.140804,0.781624
9,-2.500428,0.501197,0.219582


In [80]:
df2

Unnamed: 0,col1,col2,col3
1,0.265849,0.160103,-1.298191
2,-0.223738,0.494333,-1.431662
10,1.585311,2.615029,-0.586682
11,0.633769,-0.202903,1.285321
3,-1.830041,-1.579847,-1.084855
5,1.26957,0.330183,0.067105
7,-0.341825,-1.378992,0.750389


In [81]:
df1.reindex_like(df2)

Unnamed: 0,col1,col2,col3
1,-0.638992,0.150211,1.680077
2,-0.473994,-1.580906,1.050541
10,,,
11,,,
3,0.37363,1.020251,-0.899896
5,0.892144,0.531701,-1.367893
7,-0.027962,0.607262,1.213213
