<a href="https://colab.research.google.com/github/mrzResearchArena/TF-2.x/blob/master/pandas-ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy  as np
import matplotlib.pyplot as plt

#### [1]. Data Frame:

In [4]:
# Without Column Name(s)

D = pd.DataFrame(
    data=[[10, 20, 30], [5, 7, 9]]    ### Core Array Type
)

D

Unnamed: 0,0,1,2
0,10,20,30
1,5,7,9


In [5]:
# With Column Name(s)

D = pd.DataFrame(
    data=[[10, 20, 30], [5, 7, 9]],   ### Core Array Type
    columns=['column-1', 'column-2', 'column-3'],
)

D

Unnamed: 0,column-1,column-2,column-3
0,10,20,30
1,5,7,9


In [6]:
# With Column Name(s)

D = pd.DataFrame(
    data={'column-1':[10, 20, 30], 'column-2':[5, 7, 9],}   ### Core Dictionary Type
)

D

Unnamed: 0,column-1,column-2
0,10,5
1,20,7
2,30,9


In [7]:
# With Column Name(s)

D = pd.DataFrame(
    data=np.random.randn(10, 3),   ### Core Array Type
    columns=['column-1', 'column-2', 'column-3'],
)

D

Unnamed: 0,column-1,column-2,column-3
0,0.707626,0.71296,1.063078
1,0.801393,-0.100102,0.023788
2,-1.297472,-0.016645,-0.176722
3,-0.458644,0.353607,1.462609
4,-1.091739,-0.757744,-0.875968
5,-0.870676,0.37763,-0.437225
6,-2.019472,-1.107761,-0.669451
7,-0.956785,-1.028983,-1.35331
8,-0.390531,0.393524,-1.048029
9,-0.44795,0.387032,-1.546123


#### [2]. Slicing:

In [8]:
D['column-1']             ### Column-wise
# Note: D[['column-1']]   ### Column-wise; it is the same.

0    0.707626
1    0.801393
2   -1.297472
3   -0.458644
4   -1.091739
5   -0.870676
6   -2.019472
7   -0.956785
8   -0.390531
9   -0.447950
Name: column-1, dtype: float64

In [9]:
# D['column-1', 'column-3']   ### Column-wise; Error!

D[['column-1', 'column-3']]   ### Column-wise; Fixed the error.

Unnamed: 0,column-1,column-3
0,0.707626,1.063078
1,0.801393,0.023788
2,-1.297472,-0.176722
3,-0.458644,1.462609
4,-1.091739,-0.875968
5,-0.870676,-0.437225
6,-2.019472,-0.669451
7,-0.956785,-1.35331
8,-0.390531,-1.048029
9,-0.44795,-1.546123


In [10]:
D[0:5]                    ### Row-wise or, Sample-wise
# Note: D[0:5, 0:1]       ### Error! (Errors will disappear, if the column names are numerics.)

Unnamed: 0,column-1,column-2,column-3
0,0.707626,0.71296,1.063078
1,0.801393,-0.100102,0.023788
2,-1.297472,-0.016645,-0.176722
3,-0.458644,0.353607,1.462609
4,-1.091739,-0.757744,-0.875968


In [11]:
D.loc[0:5, ['column-1','column-2']]   ### Both row-wised and column-wised

Unnamed: 0,column-1,column-2
0,0.707626,0.71296
1,0.801393,-0.100102
2,-1.297472,-0.016645
3,-0.458644,0.353607
4,-1.091739,-0.757744
5,-0.870676,0.37763


In [12]:
### Note-1: loc  --> MATLAB like indexing, column name should be exact.
### Note-2: iloc --> Python like indexing, column name convert into 0, 1, 2, and so on.

In [13]:
# D.loc[0:5, 0:1]   ### Error!
D.iloc[0:5, 0:2]    ### Both row-wised and column-wised

Unnamed: 0,column-1,column-2
0,0.707626,0.71296
1,0.801393,-0.100102
2,-1.297472,-0.016645
3,-0.458644,0.353607
4,-1.091739,-0.757744


#### [3]. Load Dataset:

##### Exercise-1:

In [26]:
location = 'https://raw.githubusercontent.com/mrzResearchArena/TF-2.x/master/sample-1.csv'

In [27]:
D = pd.read_csv(filepath_or_buffer=location)
D

Unnamed: 0,x1,x2,x3,x4,class
0,1,11,13,41,A
1,2,22,23,42,A
2,3,33,33,43,B
3,4,44,43,44,B
4,5,55,53,45,A
5,6,66,63,46,A
6,7,77,73,47,B
7,8,88,83,48,A
8,9,99,93,49,A
9,10,110,103,410,B


In [28]:
# Note: "skiprows=1" is required if the header is exists.

D = pd.read_csv(filepath_or_buffer=location, header=None, skiprows=1) # Perfectly removed the "header"
D

Unnamed: 0,0,1,2,3,4
0,1,11,13,41,A
1,2,22,23,42,A
2,3,33,33,43,B
3,4,44,43,44,B
4,5,55,53,45,A
5,6,66,63,46,A
6,7,77,73,47,B
7,8,88,83,48,A
8,9,99,93,49,A
9,10,110,103,410,B


##### Exercise-2:

In [29]:
location = 'https://raw.githubusercontent.com/mrzResearchArena/TF-2.x/master/sample-2.csv'

In [30]:
D = pd.read_csv(filepath_or_buffer=location)
D

Unnamed: 0,1,11,13,41,A
0,2,22,23,42,A
1,3,33,33,43,B
2,4,44,43,44,B
3,5,55,53,45,A
4,6,66,63,46,A
5,7,77,73,47,B
6,8,88,83,48,A
7,9,99,93,49,A
8,10,110,103,410,B


In [31]:
# Note: "skiprows=1" is NOT required if the header is exists.

D = pd.read_csv(filepath_or_buffer=location, header=None) # Perfectly removed the "header"
D

Unnamed: 0,0,1,2,3,4
0,1,11,13,41,A
1,2,22,23,42,A
2,3,33,33,43,B
3,4,44,43,44,B
4,5,55,53,45,A
5,6,66,63,46,A
6,7,77,73,47,B
7,8,88,83,48,A
8,9,99,93,49,A
9,10,110,103,410,B
