## pandas.DataFrame
### class pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=None)
- Two-dimensional, size-mutable, potentially heterogeneous tabular data.

In [1]:
import pandas as pd;
import numpy as np;

In [5]:
d = np.array([[42,23,53],[63,12,53],[74,16,83]]);
df = pd.DataFrame(data=d);
df

Unnamed: 0,0,1,2
0,42,23,53
1,63,12,53
2,74,16,83


In [6]:
df = pd.DataFrame(data=d, index=['A','B','C']);
df

Unnamed: 0,0,1,2
A,42,23,53
B,63,12,53
C,74,16,83


In [7]:
df = pd.DataFrame(data=d, index=range(1,4), columns=['A','B','C']);
df

Unnamed: 0,A,B,C
1,42,23,53
2,63,12,53
3,74,16,83


In [16]:
df = pd.DataFrame(data=d, index=range(1,4), columns=['A','B','C'], dtype=np.float16);
df

Unnamed: 0,A,B,C
1,42.0,23.0,53.0
2,63.0,12.0,53.0
3,74.0,16.0,83.0


In [17]:
df.dtypes

A    float16
B    float16
C    float16
dtype: object

In [18]:
df = pd.DataFrame(data=d);
df

Unnamed: 0,0,1,2
0,42,23,53
1,63,12,53
2,74,16,83


In [19]:
df.dtypes

0    int32
1    int32
2    int32
dtype: object

In [28]:
df1 = pd.DataFrame(pd.Series([5,4,3,2], index=range(1,5)), columns=['X']);
print(df1);
print(id(df1));

print(id(pd.DataFrame(df1, copy=False)));

   X
1  5
2  4
3  3
4  2
2211203013056
2211203010704


In [30]:
df1 = pd.DataFrame([52,63,12,63], columns=['X']);
print(df1);
print(id(df1));

print(id(pd.DataFrame(df1, copy=True)));

    X
0  52
1  63
2  12
3  63
2211203016752
2211203012624


In [42]:
# The Copy Parameter

data = {'A':[23,53,23,53], 'B':[63.3,73.2,63,12], 'C':[55,2,63,23]};
print(id(data));

df1 = pd.DataFrame(data);
print(df);
print(id(df));

df2 = pd.DataFrame(df1, copy=False);
print(df);
print(id(df));

2211207281792
    A     B   C   D
0  23  63.3  55  34
1  53  73.2   2  53
2  23  63.0  63  23
3  53  12.0  23  45
2211203018096
    A     B   C   D
0  23  63.3  55  34
1  53  73.2   2  53
2  23  63.0  63  23
3  53  12.0  23  45
2211203018096


In [43]:
# Constructing DataFrame from a dictionary including Series:

d = {'col1':[43,23,53,23], 'col2':pd.Series([63,23,87,12])};
df = pd.DataFrame(d);
df

Unnamed: 0,col1,col2
0,43,63
1,23,23
2,53,87
3,23,12


In [50]:
d = {'col1':[53,23,53,23], 'col2':pd.Series([63,23], index=[1,2])}
df = pd.DataFrame(d, index=[0,1,2,3]);
df

Unnamed: 0,col1,col2
0,53,
1,23,63.0
2,53,23.0
3,23,


In [64]:
# Creating DataFrame from structure ndarray

d = np.array([(23,52),(64,23),(74,23),(63,12)], dtype=[('A','i4'), ('B','f2')]);
df = pd.DataFrame(d);
print(df);

df = pd.DataFrame(d, columns=['A']);
df

    A     B
0  23  52.0
1  64  23.0
2  74  23.0
3  63  12.0


Unnamed: 0,A
0,23
1,64
2,74
3,63


### Attributes

In [65]:
d = np.array([[23,53,23,52],[563,23,23,63]]);
df = pd.DataFrame(d);
df

Unnamed: 0,0,1,2,3
0,23,53,23,52
1,563,23,23,63


In [66]:
df.T

Unnamed: 0,0,1
0,23,563
1,53,23
2,23,23
3,52,63


In [68]:
df = pd.read_csv('csv_like_file.txt', sep='\t');
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
P_id,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0
Age,22.0,38.0,26.0,35.0,35.0,28.0,54.0,2.0,27.0,14.0
Fare,7.25,71.2833,7.925,53.1,8.05,8.4583,51.8625,21.075,11.1333,30.0708
Sex,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
sibsp,1.0,1.0,0.0,1.0,0.0,0.0,0.0,3.0,0.0,1.0
zero,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zero.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zero.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zero.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zero.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### at

- Access a single value for a row/column label pair.

In [69]:
df.at[0,'Sex']

0

In [70]:
df.at[1,'P_id']

2

In [71]:
df.at[2,'Age']

26

In [75]:
df = pd.DataFrame({'A':[34,35,23,643], 'B':[63,23,52,23]}, index=range(11,15), columns=['A','B']);
df

Unnamed: 0,A,B
11,34,63
12,35,23
13,23,52
14,643,23


In [79]:
df.at[11,'A']

34

In [80]:
df.at[13,'B']

52

In [88]:
df.style

Unnamed: 0,A,B
11,34,63
12,35,23
13,23,52
14,643,23


#### axes

- Return a list representing the axes of the DataFrame.

In [92]:
df = pd.read_csv('csv_like_file.txt', sep='\t');
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [93]:
df.axes

[RangeIndex(start=0, stop=10, step=1),
 Index(['P_id', 'Age', 'Fare', 'Sex', 'sibsp', 'zero', 'zero.1', 'zero.2',
        'zero.3', 'zero.4', 'zero.5'],
       dtype='object')]

In [94]:
df.index

RangeIndex(start=0, stop=10, step=1)

In [96]:
df.columns

Index(['P_id', 'Age', 'Fare', 'Sex', 'sibsp', 'zero', 'zero.1', 'zero.2',
       'zero.3', 'zero.4', 'zero.5'],
      dtype='object')

In [98]:
df.index[0:4]

RangeIndex(start=0, stop=4, step=1)

In [101]:
df.columns[0]

'P_id'

In [104]:
df.attrs

{}

In [105]:
df.empty

False

In [106]:
df.dtypes

P_id        int64
Age         int64
Fare      float64
Sex         int64
sibsp       int64
zero        int64
zero.1      int64
zero.2      int64
zero.3      int64
zero.4      int64
zero.5      int64
dtype: object

In [107]:
df.flags

<Flags(allows_duplicate_labels=True)>

In [108]:
df.values

array([[ 1.    , 22.    ,  7.25  ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 2.    , 38.    , 71.2833,  1.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 3.    , 26.    ,  7.925 ,  1.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 4.    , 35.    , 53.1   ,  1.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 5.    , 35.    ,  8.05  ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 6.    , 28.    ,  8.4583,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 7.    , 54.    , 51.8625,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 8.    ,  2.    , 21.075 ,  0.    ,  3.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 9.    , 27.    , 11.1333,  1.    ,  0.    ,  0.    ,  0.    ,
 

In [109]:
df.ndim

2

In [110]:
df.shape

(10, 11)

In [111]:
df.size

110

In [112]:
df.dtypes

P_id        int64
Age         int64
Fare      float64
Sex         int64
sibsp       int64
zero        int64
zero.1      int64
zero.2      int64
zero.3      int64
zero.4      int64
zero.5      int64
dtype: object

In [113]:
df.index

RangeIndex(start=0, stop=10, step=1)

In [114]:
df.columns

Index(['P_id', 'Age', 'Fare', 'Sex', 'sibsp', 'zero', 'zero.1', 'zero.2',
       'zero.3', 'zero.4', 'zero.5'],
      dtype='object')

In [115]:
df.flags

<Flags(allows_duplicate_labels=True)>

In [None]:
df.d

### Methods

In [2]:
df = pd.DataFrame([24,42,12,31]);
df

Unnamed: 0,0
0,24
1,42
2,12
3,31


In [8]:
df = pd.DataFrame([[23,-52,-42,62],[53,-24,63,-23],[35,23,-63,34]], index=range(1,4), columns=['A','B','C','D']);
df

Unnamed: 0,A,B,C,D
1,23,-52,-42,62
2,53,-24,63,-23
3,35,23,-63,34


In [9]:
df.abs()

Unnamed: 0,A,B,C,D
1,23,52,42,62
2,53,24,63,23
3,35,23,63,34


In [12]:
df.add([23,53,23,53])

Unnamed: 0,A,B,C,D
1,46,1,-19,115
2,76,29,86,30
3,58,76,-40,87


In [15]:
df.add([23,42,23], axis=0)

Unnamed: 0,A,B,C,D
1,46,-29,-19,85
2,95,18,105,19
3,58,46,-40,57


In [21]:
df.add([53,23,63], axis=0, fill_value=0)

Unnamed: 0,A,B,C,D
1,76,1,11,115
2,76,-1,86,0
3,98,86,0,97


In [22]:
df

Unnamed: 0,A,B,C,D
1,23,-52,-42,62
2,53,-24,63,-23
3,35,23,-63,34


In [23]:
df.add_prefix('col_')

Unnamed: 0,col_A,col_B,col_C,col_D
1,23,-52,-42,62
2,53,-24,63,-23
3,35,23,-63,34


In [28]:
df.add_prefix('index#')

Unnamed: 0,index#A,index#B,index#C,index#D
1,23,-52,-42,62
2,53,-24,63,-23
3,35,23,-63,34


In [30]:
pd.DataFrame([23,42,23,42], index=range(1,5), columns=['A']).add_prefix('Col_')

Unnamed: 0,Col_A
1,23
2,42
3,23
4,42


In [33]:
df.add_suffix('-Col')

Unnamed: 0,A-Col,B-Col,C-Col,D-Col
1,23,-52,-42,62
2,53,-24,63,-23
3,35,23,-63,34


In [34]:
df.add_suffix('--Column')

Unnamed: 0,A--Column,B--Column,C--Column,D--Column
1,23,-52,-42,62
2,53,-24,63,-23
3,35,23,-63,34


In [35]:
df

Unnamed: 0,A,B,C,D
1,23,-52,-42,62
2,53,-24,63,-23
3,35,23,-63,34


In [42]:
df.agg(['sum','min',np.max,np.min])

Unnamed: 0,A,B,C,D
sum,111,-53,-42,73
min,23,-52,-63,-23
amax,53,23,63,62
amin,23,-52,-63,-23


In [46]:
df.aggregate(['sum','min'])

Unnamed: 0,A,B,C,D
sum,111,-53,-42,73
min,23,-52,-63,-23


In [47]:
type(df.aggregate(['sum', 'min']))

pandas.core.frame.DataFrame

In [44]:
df.aggregate('sum')

A    111
B    -53
C    -42
D     73
dtype: int64

In [45]:
type(df.aggregate('sum'))

pandas.core.series.Series

In [48]:
df = pd.read_csv('csv_like_file.txt', sep='\t');
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [50]:
df['Fare'].add(20)

0    27.2500
1    91.2833
2    27.9250
3    73.1000
4    28.0500
5    28.4583
6    71.8625
7    41.0750
8    31.1333
9    50.0708
Name: Fare, dtype: float64

In [52]:
type(df['Fare'].add(30))

pandas.core.series.Series

In [53]:
df.agg(sum)

P_id       55.0000
Age       281.0000
Fare      270.2082
Sex         5.0000
sibsp       7.0000
zero        0.0000
zero.1      0.0000
zero.2      0.0000
zero.3      0.0000
zero.4      0.0000
zero.5      0.0000
dtype: float64

In [67]:
df.aggregate([np.sum])

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
sum,55,281,270.2082,5,7,0,0,0,0,0,0


In [68]:
df.all()

P_id       True
Age        True
Fare       True
Sex       False
sibsp     False
zero      False
zero.1    False
zero.2    False
zero.3    False
zero.4    False
zero.5    False
dtype: bool

In [69]:
df.all(axis=0)

P_id       True
Age        True
Fare       True
Sex       False
sibsp     False
zero      False
zero.1    False
zero.2    False
zero.3    False
zero.4    False
zero.5    False
dtype: bool

In [70]:
df.all(axis=1)

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
dtype: bool

In [71]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [72]:
df.any()

P_id       True
Age        True
Fare       True
Sex        True
sibsp      True
zero      False
zero.1    False
zero.2    False
zero.3    False
zero.4    False
zero.5    False
dtype: bool

In [73]:
df.any(axis=0)

P_id       True
Age        True
Fare       True
Sex        True
sibsp      True
zero      False
zero.1    False
zero.2    False
zero.3    False
zero.4    False
zero.5    False
dtype: bool

In [74]:
df.any(axis=1)

0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
9    True
dtype: bool

In [75]:
df = pd.DataFrame([[34,53,23,52],[53,23,52,53]]);
df

Unnamed: 0,0,1,2,3
0,34,53,23,52
1,53,23,52,53


In [76]:
df.dtypes

0    int64
1    int64
2    int64
3    int64
dtype: object

In [77]:
df.astype(np.float16)

Unnamed: 0,0,1,2,3
0,34.0,53.0,23.0,52.0
1,53.0,23.0,52.0,53.0


In [79]:
df

Unnamed: 0,0,1,2,3
0,34,53,23,52
1,53,23,52,53


In [80]:
df = pd.read_csv('csv_like_file.txt', sep='\t');
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [81]:
print(id(df));

df.copy()

2559839143392


Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [82]:
print(id(df))

2559839143392


In [84]:
df2 = df.copy()
df2

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [86]:
print(id(df2))
print(id(df))

2559839143008
2559839143392


In [87]:
df1 = df

In [88]:
df1

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [89]:
print(id(df))
print(id(df1))

2559839143392
2559839143392


In [93]:
df.cumsum(axis=0)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,3,60,78.5333,1,2,0,0,0,0,0,0
2,6,86,86.4583,2,2,0,0,0,0,0,0
3,10,121,139.5583,3,3,0,0,0,0,0,0
4,15,156,147.6083,3,3,0,0,0,0,0,0
5,21,184,156.0666,3,3,0,0,0,0,0,0
6,28,238,207.9291,3,3,0,0,0,0,0,0
7,36,240,229.0041,3,6,0,0,0,0,0,0
8,45,267,240.1374,4,6,0,0,0,0,0,0
9,55,281,270.2082,5,7,0,0,0,0,0,0


In [94]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [95]:
df.cumsum(axis=1)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1.0,23.0,30.25,30.25,31.25,31.25,31.25,31.25,31.25,31.25,31.25
1,2.0,40.0,111.2833,112.2833,113.2833,113.2833,113.2833,113.2833,113.2833,113.2833,113.2833
2,3.0,29.0,36.925,37.925,37.925,37.925,37.925,37.925,37.925,37.925,37.925
3,4.0,39.0,92.1,93.1,94.1,94.1,94.1,94.1,94.1,94.1,94.1
4,5.0,40.0,48.05,48.05,48.05,48.05,48.05,48.05,48.05,48.05,48.05
5,6.0,34.0,42.4583,42.4583,42.4583,42.4583,42.4583,42.4583,42.4583,42.4583,42.4583
6,7.0,61.0,112.8625,112.8625,112.8625,112.8625,112.8625,112.8625,112.8625,112.8625,112.8625
7,8.0,10.0,31.075,31.075,34.075,34.075,34.075,34.075,34.075,34.075,34.075
8,9.0,36.0,47.1333,48.1333,48.1333,48.1333,48.1333,48.1333,48.1333,48.1333,48.1333
9,10.0,24.0,54.0708,55.0708,56.0708,56.0708,56.0708,56.0708,56.0708,56.0708,56.0708


In [97]:
df.cummin(axis=1)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2.0,2.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3.0,3.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,4.0,4.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,7.0,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,8.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,9.0,9.0,9.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,10.0,10.0,10.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [98]:
df.cummax(axis=1)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0
1,2.0,38.0,71.2833,71.2833,71.2833,71.2833,71.2833,71.2833,71.2833,71.2833,71.2833
2,3.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0
3,4.0,35.0,53.1,53.1,53.1,53.1,53.1,53.1,53.1,53.1,53.1
4,5.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0
5,6.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0
6,7.0,54.0,54.0,54.0,54.0,54.0,54.0,54.0,54.0,54.0,54.0
7,8.0,8.0,21.075,21.075,21.075,21.075,21.075,21.075,21.075,21.075,21.075
8,9.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0
9,10.0,14.0,30.0708,30.0708,30.0708,30.0708,30.0708,30.0708,30.0708,30.0708,30.0708


In [100]:
df.cummax(axis=0)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,38,71.2833,1,1,0,0,0,0,0,0
3,4,38,71.2833,1,1,0,0,0,0,0,0
4,5,38,71.2833,1,1,0,0,0,0,0,0
5,6,38,71.2833,1,1,0,0,0,0,0,0
6,7,54,71.2833,1,1,0,0,0,0,0,0
7,8,54,71.2833,1,3,0,0,0,0,0,0
8,9,54,71.2833,1,3,0,0,0,0,0,0
9,10,54,71.2833,1,3,0,0,0,0,0,0


### DataFrame.describe(percentiles=None, include=None, exclude=None)
- Generate descriptive statistics.

In [30]:
df = pd.read_csv("csv_like_file.txt", sep='\t');
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [3]:
df.describe()

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,5.5,28.1,27.02082,0.5,0.7,0.0,0.0,0.0,0.0,0.0,0.0
std,3.02765,14.09058,23.601938,0.527046,0.948683,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,2.0,7.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.25,23.0,8.152075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,5.5,27.5,16.10415,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.75,35.0,46.414575,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
max,10.0,54.0,71.2833,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
df = pd.read_csv("Employee.csv", nrows=20);
df

Unnamed: 0,Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain,LeaveOrNot
0,Bachelors,2017,",",3,34,Male,No,0,0.0
1,Bachelors,2013,Pune,1,28,Female,,3,1.0
2,Null,2014,,3,38,,No,2,0.0
3,Masters,2016,Bangalore,3,27,Male,No,5,1.0
4,Masters,2017,Pune,3,24,,Yes,2,1.0
5,Bachelors,2016,Bangalore,3,22,Male,No,0,
6,Bachelors,2015,,3,38,Male,No,0,0.0
7,,2016,Bangalore,3,34,Female,No,2,1.0
8,Bachelors,2016,Pune,3,23,,No,1,0.0
9,Masters,2017,New Delhi,2,37,Male,No,2,0.0


In [8]:
df.describe()

Unnamed: 0,JoiningYear,PaymentTier,Age,ExperienceInCurrentDomain,LeaveOrNot
count,20.0,20.0,20.0,20.0,18.0
mean,2015.35,2.65,31.15,2.25,0.444444
std,1.694418,0.67082,5.62209,1.743409,0.51131
min,2012.0,1.0,22.0,0.0,0.0
25%,2014.0,2.75,27.0,0.75,0.0
50%,2016.0,3.0,33.0,2.0,0.0
75%,2016.25,3.0,34.75,3.25,1.0
max,2018.0,3.0,39.0,5.0,1.0


In [10]:
df.describe(percentiles=[0.1,0.2,0.3])

Unnamed: 0,JoiningYear,PaymentTier,Age,ExperienceInCurrentDomain,LeaveOrNot
count,20.0,20.0,20.0,20.0,18.0
mean,2015.35,2.65,31.15,2.25,0.444444
std,1.694418,0.67082,5.62209,1.743409,0.51131
min,2012.0,1.0,22.0,0.0,0.0
10%,2012.9,1.9,22.9,0.0,0.0
20%,2014.0,2.0,26.4,0.0,0.0
30%,2014.7,3.0,27.7,1.7,0.0
50%,2016.0,3.0,33.0,2.0,0.0
max,2018.0,3.0,39.0,5.0,1.0


In [12]:
df.describe(include=np.number)

Unnamed: 0,JoiningYear,PaymentTier,Age,ExperienceInCurrentDomain,LeaveOrNot
count,20.0,20.0,20.0,20.0,18.0
mean,2015.35,2.65,31.15,2.25,0.444444
std,1.694418,0.67082,5.62209,1.743409,0.51131
min,2012.0,1.0,22.0,0.0,0.0
25%,2014.0,2.75,27.0,0.75,0.0
50%,2016.0,3.0,33.0,2.0,0.0
75%,2016.25,3.0,34.75,3.25,1.0
max,2018.0,3.0,39.0,5.0,1.0


In [16]:
df.describe(include=object)

Unnamed: 0,Education,City,Gender,EverBenched
count,19,17,17,19
unique,3,4,2,2
top,Bachelors,Pune,Male,No
freq,14,7,12,17


In [15]:
df.describe(exclude=object)

Unnamed: 0,JoiningYear,PaymentTier,Age,ExperienceInCurrentDomain,LeaveOrNot
count,20.0,20.0,20.0,20.0,18.0
mean,2015.35,2.65,31.15,2.25,0.444444
std,1.694418,0.67082,5.62209,1.743409,0.51131
min,2012.0,1.0,22.0,0.0,0.0
25%,2014.0,2.75,27.0,0.75,0.0
50%,2016.0,3.0,33.0,2.0,0.0
75%,2016.25,3.0,34.75,3.25,1.0
max,2018.0,3.0,39.0,5.0,1.0


In [17]:
df.dtypes

Education                     object
JoiningYear                    int64
City                          object
PaymentTier                    int64
Age                            int64
Gender                        object
EverBenched                   object
ExperienceInCurrentDomain      int64
LeaveOrNot                   float64
dtype: object

In [18]:
df.describe(include=np.floating)

Unnamed: 0,LeaveOrNot
count,18.0
mean,0.444444
std,0.51131
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


In [19]:
df.describe(exclude=np.floating)

Unnamed: 0,Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain
count,19,20.0,17,20.0,20.0,17,19,20.0
unique,3,,4,,,2,2,
top,Bachelors,,Pune,,,Male,No,
freq,14,,7,,,12,17,
mean,,2015.35,,2.65,31.15,,,2.25
std,,1.694418,,0.67082,5.62209,,,1.743409
min,,2012.0,,1.0,22.0,,,0.0
25%,,2014.0,,2.75,27.0,,,0.75
50%,,2016.0,,3.0,33.0,,,2.0
75%,,2016.25,,3.0,34.75,,,3.25


In [20]:
df.describe(include=np.integer)

Unnamed: 0,JoiningYear,PaymentTier,Age,ExperienceInCurrentDomain
count,20.0,20.0,20.0,20.0
mean,2015.35,2.65,31.15,2.25
std,1.694418,0.67082,5.62209,1.743409
min,2012.0,1.0,22.0,0.0
25%,2014.0,2.75,27.0,0.75
50%,2016.0,3.0,33.0,2.0
75%,2016.25,3.0,34.75,3.25
max,2018.0,3.0,39.0,5.0


In [31]:
df.describe(percentiles=[0.2,0.4,0.6,0.8])

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,5.5,28.1,27.02082,0.5,0.7,0.0,0.0,0.0,0.0,0.0,0.0
std,3.02765,14.09058,23.601938,0.527046,0.948683,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,2.0,7.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20%,2.8,20.4,8.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
40%,4.6,26.6,10.0633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,5.5,27.5,16.10415,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0
60%,6.4,30.8,24.67332,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
80%,8.2,35.6,52.11,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
max,10.0,54.0,71.2833,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


### DataFrame.diff(periods=1, axis=0)
- First discrete difference of element.

In [23]:
df = pd.DataFrame([[1,2,3,4,5,6],[12,13,14,15,16,17],[27,28,29,30,31,32]], index=range(1,4), columns=['A','B','C','D','E','F']);
df

Unnamed: 0,A,B,C,D,E,F
1,1,2,3,4,5,6
2,12,13,14,15,16,17
3,27,28,29,30,31,32


In [24]:
df.diff()                        # row(i)-row(i-1) element wise

Unnamed: 0,A,B,C,D,E,F
1,,,,,,
2,11.0,11.0,11.0,11.0,11.0,11.0
3,15.0,15.0,15.0,15.0,15.0,15.0


In [25]:
df.diff(axis=1)                      # column(i)-column(i-1)

Unnamed: 0,A,B,C,D,E,F
1,,1,1,1,1,1
2,,1,1,1,1,1
3,,1,1,1,1,1


In [27]:
df.diff(periods=2)                      # row(i)-row(i-2)

Unnamed: 0,A,B,C,D,E,F
1,,,,,,
2,,,,,,
3,26.0,26.0,26.0,26.0,26.0,26.0


In [28]:
df.diff(periods=2, axis=1)              # column(i)-column(i-2)

Unnamed: 0,A,B,C,D,E,F
1,,,2,2,2,2
2,,,2,2,2,2
3,,,2,2,2,2


In [29]:
df.diff(periods=-1)                     # row(i)-row(i+1)

Unnamed: 0,A,B,C,D,E,F
1,-11.0,-11.0,-11.0,-11.0,-11.0,-11.0
2,-15.0,-15.0,-15.0,-15.0,-15.0,-15.0
3,,,,,,


### DataFrame.div(other, axis='columns', level=None, fill_value=None)
- Get Floating division of dataframe and other, element-wise (binary operator truediv).

In [32]:
df = pd.DataFrame({'angles': [0, 3, 4],
                   'degrees': [360, 180, 360]},
                  index=['circle', 'triangle', 'rectangle']);
df

Unnamed: 0,angles,degrees
circle,0,360
triangle,3,180
rectangle,4,360


In [35]:
df.add(1)

Unnamed: 0,angles,degrees
circle,1,361
triangle,4,181
rectangle,5,361


In [36]:
df+1

Unnamed: 0,angles,degrees
circle,1,361
triangle,4,181
rectangle,5,361


In [37]:
df.add([3,4])

Unnamed: 0,angles,degrees
circle,3,364
triangle,6,184
rectangle,7,364


In [38]:
df.add([3,4], fill_value=0)

Unnamed: 0,angles,degrees
circle,3,364
triangle,6,184
rectangle,7,364


In [39]:
df.add([3,4], axis='columns')

Unnamed: 0,angles,degrees
circle,3,364
triangle,6,184
rectangle,7,364


In [41]:
df

Unnamed: 0,angles,degrees
circle,0,360
triangle,3,180
rectangle,4,360


In [40]:
df.add([6,2,5],axis='rows')

Unnamed: 0,angles,degrees
circle,6,366
triangle,5,182
rectangle,9,365


In [42]:
df.add([34,53,23],axis='index')

Unnamed: 0,angles,degrees
circle,34,394
triangle,56,233
rectangle,27,383


In [98]:
df.add(pd.Series([3,4,5], index=['circle','rectangle','triangle']),axis='index')

Unnamed: 0,angles,degrees
circle,3,363
rectangle,8,364
triangle,8,185


In [47]:
df

Unnamed: 0,angles,degrees
circle,0,360
triangle,3,180
rectangle,4,360


In [46]:
df.sub(3)

Unnamed: 0,angles,degrees
circle,-3,357
triangle,0,177
rectangle,1,357


In [48]:
df.div(4)

Unnamed: 0,angles,degrees
circle,0.0,90.0
triangle,0.75,45.0
rectangle,1.0,90.0


In [49]:
df.divide(4)

Unnamed: 0,angles,degrees
circle,0.0,90.0
triangle,0.75,45.0
rectangle,1.0,90.0


In [50]:
df.divide(4,fill_value=0)

Unnamed: 0,angles,degrees
circle,0.0,90.0
triangle,0.75,45.0
rectangle,1.0,90.0


In [55]:
df.mul(pd.Series([4,3], index=['angles','degrees']))

Unnamed: 0,angles,degrees
circle,0,1080
triangle,12,540
rectangle,16,1080


In [56]:
df

Unnamed: 0,angles,degrees
circle,0,360
triangle,3,180
rectangle,4,360


In [59]:
df.mul({'circle':3, 'triangle':4, 'rectangle':5}, axis='index')

Unnamed: 0,angles,degrees
circle,0,1080
triangle,12,720
rectangle,20,1800


In [66]:
df.div({'angles':1, 'degrees':4}, axis='columns')

Unnamed: 0,angles,degrees
circle,0.0,90.0
triangle,3.0,45.0
rectangle,4.0,90.0


In [78]:
other = pd.DataFrame({'angles':[3,4,5]}, index=['circle','triangle','rectangle']);
other

Unnamed: 0,angles
circle,3
triangle,4
rectangle,5


In [79]:
df.div(other)

Unnamed: 0,angles,degrees
circle,0.0,
triangle,0.75,
rectangle,0.8,


In [80]:
df.mul(other)

Unnamed: 0,angles,degrees
circle,0,
triangle,12,
rectangle,20,


In [85]:
df.div(other, fill_value=1)

Unnamed: 0,angles,degrees
circle,0.0,360.0
triangle,0.75,180.0
rectangle,0.8,360.0


In [86]:
df.mul(other, fill_value=0)

Unnamed: 0,angles,degrees
circle,0,0.0
triangle,12,0.0
rectangle,20,0.0


In [95]:
df.mul(pd.DataFrame({'degrees':[3,4,4]}, index=['circle','triangle','rectangle']), axis=1)

Unnamed: 0,angles,degrees
circle,,1080
triangle,,720
rectangle,,1440


In [96]:
df.mul(pd.DataFrame({'degrees':[3,4,4]}, index=['circle','triangle','rectangle']), axis=1, fill_value=0)

Unnamed: 0,angles,degrees
circle,0.0,1080
triangle,0.0,720
rectangle,0.0,1440


In [99]:
df_multindex = pd.DataFrame({'angles': [0, 3, 4, 4, 5, 6],
                             'degrees': [360, 180, 360, 360, 540, 720]},
                            index=[['A', 'A', 'A', 'B', 'B', 'B'],
                                   ['circle', 'triangle', 'rectangle',
                                    'square', 'pentagon', 'hexagon']]);
df_multindex

Unnamed: 0,Unnamed: 1,angles,degrees
A,circle,0,360
A,triangle,3,180
A,rectangle,4,360
B,square,4,360
B,pentagon,5,540
B,hexagon,6,720


In [108]:
df.add(df_multindex, level=1, fill_value=0.0)

Unnamed: 0,Unnamed: 1,angles,degrees
A,circle,0.0,720.0
A,triangle,6.0,360.0
A,rectangle,8.0,720.0
B,square,4.0,360.0
B,pentagon,5.0,540.0
B,hexagon,6.0,720.0


In [110]:
df.rdiv(3)                #   Here the Dividend is 3 and divisor is df.   i.e.   3/df

Unnamed: 0,angles,degrees
circle,inf,0.008333
triangle,1.0,0.016667
rectangle,0.75,0.008333


In [111]:
df.keys()

Index(['angles', 'degrees'], dtype='object')

In [3]:
df = pd.DataFrame([True]);
df

Unnamed: 0,0
0,True


In [4]:
df.bool()

True

In [5]:
df= pd.DataFrame([1,1]);
df

Unnamed: 0,0
0,1
1,1


In [9]:
df.any().bool()

True

In [10]:
df.all().bool()

True

In [11]:
df.all()

0    True
dtype: bool

In [12]:
df = pd.DataFrame([3,0,1,2,3,0,0], index=range(1,8), columns=['A']);
df

Unnamed: 0,A
1,3
2,0
3,1
4,2
5,3
6,0
7,0


In [13]:
df.any().bool()

True

In [14]:
df.all().bool()

False

In [15]:
df.all()

A    False
dtype: bool

In [16]:
df.any()

A    True
dtype: bool

In [17]:
df==True

Unnamed: 0,A
1,False
2,False
3,True
4,False
5,False
6,False
7,False


In [18]:
# DataFrame.head()

df = pd.read_csv("csv_like_file.txt", sep='\t');
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [19]:
df.head()

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0


In [20]:
df.head(6)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0


In [21]:
df.head(-2)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0


In [22]:
df.head(0)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5


In [23]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


### Indexing and Slicding

In [24]:
df.at[0,'P_id']     # ValueError If row/column label pair is not a tuple or if any label from

1

In [26]:
df.at[9,'Age']

14

In [28]:
df.loc[0,'P_id']

1

In [29]:
df.loc[0]

P_id       1.00
Age       22.00
Fare       7.25
Sex        0.00
sibsp      1.00
zero       0.00
zero.1     0.00
zero.2     0.00
zero.3     0.00
zero.4     0.00
zero.5     0.00
Name: 0, dtype: float64

In [31]:
df.iloc[0,0]

1

In [32]:
df.iloc[0]

P_id       1.00
Age       22.00
Fare       7.25
Sex        0.00
sibsp      1.00
zero       0.00
zero.1     0.00
zero.2     0.00
zero.3     0.00
zero.4     0.00
zero.5     0.00
Name: 0, dtype: float64

In [34]:
type(df.iloc[3])

pandas.core.series.Series

In [35]:
type(df.loc[0])

pandas.core.series.Series

In [36]:
type(df.at[0,'Age'])

numpy.int64

In [37]:
type(df.iloc[0,0])

numpy.int64

In [41]:
df.iat[0,0]             # you must give it two arguments i.e. row and col,   it is used for single element

1

In [42]:
df.iat[3,4]

1

In [44]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [46]:
df.loc[1]

P_id       2.0000
Age       38.0000
Fare      71.2833
Sex        1.0000
sibsp      1.0000
zero       0.0000
zero.1     0.0000
zero.2     0.0000
zero.3     0.0000
zero.4     0.0000
zero.5     0.0000
Name: 1, dtype: float64

In [47]:
df.loc[[0,1,2]]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0


In [49]:
df.loc[[0,2,3]]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0


In [50]:
type(df.loc[[2,3,3]])

pandas.core.frame.DataFrame

In [51]:
df.loc[0,'P_id']

1

In [53]:
df.loc[df['P_id']<4]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0


In [54]:
df['P_id']

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
Name: P_id, dtype: int64

In [59]:
df[0:8]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0


In [61]:
df.loc[(df['P_id']>2) & (df['P_id']<7)]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0


In [65]:
df.loc[np.bool_(np.zeros(10))]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5


In [67]:
df.loc[np.bool_(np.ones(10))]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [78]:
df = pd.DataFrame(pd.read_csv('csv_like_file.txt', sep='\t', index_col=False), index=[0,1,2,3,4,5,6,7,8,9]);

In [79]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [80]:
df = pd.DataFrame(pd.read_csv('csv_like_file.txt', sep='\t', index_col=False), index=['r1','r2','r3','r4','r5','r6','r7','r8','r9','r10']);

In [81]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
r1,,,,,,,,,,,
r2,,,,,,,,,,,
r3,,,,,,,,,,,
r4,,,,,,,,,,,
r5,,,,,,,,,,,
r6,,,,,,,,,,,
r7,,,,,,,,,,,
r8,,,,,,,,,,,
r9,,,,,,,,,,,
r10,,,,,,,,,,,


In [117]:
df = pd.read_csv('csv_like_file.txt', sep='\t', index_col=None)
df['']=['r1','r2','r3','r4','r5','r6','r7','r8','r9','r10'];
df.set_index('', inplace=True)
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
,,,,,,,,,,,
r1,1.0,22.0,7.25,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r2,2.0,38.0,71.2833,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r3,3.0,26.0,7.925,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r4,4.0,35.0,53.1,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r5,5.0,35.0,8.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r6,6.0,28.0,8.4583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r7,7.0,54.0,51.8625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r8,8.0,2.0,21.075,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
r9,9.0,27.0,11.1333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [118]:
df.loc['r1','P_id']

1

In [121]:
df.loc[pd.Series([True,False,False,True,True,False,True,True,False,False], index=['r1','r2','r3','r4','r5','r6','r7','r8','r9','r10'])]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
,,,,,,,,,,,
r1,1.0,22.0,7.25,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r4,4.0,35.0,53.1,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r5,5.0,35.0,8.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r7,7.0,54.0,51.8625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r8,8.0,2.0,21.075,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [122]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
,,,,,,,,,,,
r1,1.0,22.0,7.25,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r2,2.0,38.0,71.2833,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r3,3.0,26.0,7.925,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r4,4.0,35.0,53.1,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r5,5.0,35.0,8.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r6,6.0,28.0,8.4583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r7,7.0,54.0,51.8625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r8,8.0,2.0,21.075,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
r9,9.0,27.0,11.1333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [127]:
df.loc[lambda x: df['Fare']<40]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
,,,,,,,,,,,
r1,1.0,22.0,7.25,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r3,3.0,26.0,7.925,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r5,5.0,35.0,8.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r6,6.0,28.0,8.4583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r8,8.0,2.0,21.075,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
r9,9.0,27.0,11.1333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r10,10.0,14.0,30.0708,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [128]:
df['Fare'].loc[lambda x: df['Fare']<40]


r1      7.2500
r3      7.9250
r5      8.0500
r6      8.4583
r8     21.0750
r9     11.1333
r10    30.0708
Name: Fare, dtype: float64

In [129]:
df.loc['r1':'r5', :]

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
,,,,,,,,,,,
r1,1.0,22.0,7.25,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r2,2.0,38.0,71.2833,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r3,3.0,26.0,7.925,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
r4,4.0,35.0,53.1,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
r5,5.0,35.0,8.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [130]:
df.loc[:, 'P_id':'Sex']

Unnamed: 0,P_id,Age,Fare,Sex
,,,,
r1,1.0,22.0,7.25,0.0
r2,2.0,38.0,71.2833,1.0
r3,3.0,26.0,7.925,1.0
r4,4.0,35.0,53.1,1.0
r5,5.0,35.0,8.05,0.0
r6,6.0,28.0,8.4583,0.0
r7,7.0,54.0,51.8625,0.0
r8,8.0,2.0,21.075,0.0
r9,9.0,27.0,11.1333,1.0


In [135]:
# MultiIndex

tuples = [('level1','row1'),('level1','row2'),('level1','row3'),('level2','row1'),('level2','row2'),('level2','row3')];
index = pd.MultiIndex.from_tuples(tuples);
columns = ['Name','Age','GPA',];
data = [['Kareem',22,3.4],['Saleem',19,3.5],['Usman',22,3.4],['Akram',23,3.6],['Waleed',24,3.5],['Asad',21,3.8]];

df = pd.DataFrame(data, index=index, columns=columns);
df

Unnamed: 0,Unnamed: 1,Name,Age,GPA
level1,row1,Kareem,22,3.4
level1,row2,Saleem,19,3.5
level1,row3,Usman,22,3.4
level2,row1,Akram,23,3.6
level2,row2,Waleed,24,3.5
level2,row3,Asad,21,3.8


In [137]:
df.loc['level1']

Unnamed: 0,Name,Age,GPA
row1,Kareem,22,3.4
row2,Saleem,19,3.5
row3,Usman,22,3.4


In [138]:
df.loc['level1','row1']

Name    Kareem
Age         22
GPA        3.4
Name: (level1, row1), dtype: object

In [139]:
df.loc[('level2','row2')]

Name    Waleed
Age         24
GPA        3.5
Name: (level2, row2), dtype: object

In [140]:
df.loc['level1':'level2']

Unnamed: 0,Unnamed: 1,Name,Age,GPA
level1,row1,Kareem,22,3.4
level1,row2,Saleem,19,3.5
level1,row3,Usman,22,3.4
level2,row1,Akram,23,3.6
level2,row2,Waleed,24,3.5
level2,row3,Asad,21,3.8


In [142]:
df.loc[['level1','level2']]

Unnamed: 0,Unnamed: 1,Name,Age,GPA
level1,row1,Kareem,22,3.4
level1,row2,Saleem,19,3.5
level1,row3,Usman,22,3.4
level2,row1,Akram,23,3.6
level2,row2,Waleed,24,3.5
level2,row3,Asad,21,3.8


In [143]:
df.loc[[('level1','row2')]]   # Data Frame

Unnamed: 0,Unnamed: 1,Name,Age,GPA
level1,row2,Saleem,19,3.5


In [149]:
df.loc[[('level1','row1')],'Name']

level1  row1    Kareem
Name: Name, dtype: object

In [153]:
df.loc['level1','row1']['Name']

'Kareem'

In [154]:
df.loc[('level1','row2'):('level2','row2')]

Unnamed: 0,Unnamed: 1,Name,Age,GPA
level1,row2,Saleem,19,3.5
level1,row3,Usman,22,3.4
level2,row1,Akram,23,3.6
level2,row2,Waleed,24,3.5


In [155]:
df.loc[('level1','row1'):('level1','row3')]

Unnamed: 0,Unnamed: 1,Name,Age,GPA
level1,row1,Kareem,22,3.4
level1,row2,Saleem,19,3.5
level1,row3,Usman,22,3.4


In [158]:
df = pd.read_csv('csv_like_file.txt', sep='\t');
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [159]:
df.info

<bound method DataFrame.info of    P_id  Age     Fare  Sex  sibsp  zero  zero.1  zero.2  zero.3  zero.4  \
0     1   22   7.2500    0      1     0       0       0       0       0   
1     2   38  71.2833    1      1     0       0       0       0       0   
2     3   26   7.9250    1      0     0       0       0       0       0   
3     4   35  53.1000    1      1     0       0       0       0       0   
4     5   35   8.0500    0      0     0       0       0       0       0   
5     6   28   8.4583    0      0     0       0       0       0       0   
6     7   54  51.8625    0      0     0       0       0       0       0   
7     8    2  21.0750    0      3     0       0       0       0       0   
8     9   27  11.1333    1      0     0       0       0       0       0   
9    10   14  30.0708    1      1     0       0       0       0       0   

   zero.5  
0       0  
1       0  
2       0  
3       0  
4       0  
5       0  
6       0  
7       0  
8       0  
9       0  >

In [160]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   P_id    10 non-null     int64  
 1   Age     10 non-null     int64  
 2   Fare    10 non-null     float64
 3   Sex     10 non-null     int64  
 4   sibsp   10 non-null     int64  
 5   zero    10 non-null     int64  
 6   zero.1  10 non-null     int64  
 7   zero.2  10 non-null     int64  
 8   zero.3  10 non-null     int64  
 9   zero.4  10 non-null     int64  
 10  zero.5  10 non-null     int64  
dtypes: float64(1), int64(10)
memory usage: 1008.0 bytes


In [161]:
df.info(show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   P_id    10 non-null     int64  
 1   Age     10 non-null     int64  
 2   Fare    10 non-null     float64
 3   Sex     10 non-null     int64  
 4   sibsp   10 non-null     int64  
 5   zero    10 non-null     int64  
 6   zero.1  10 non-null     int64  
 7   zero.2  10 non-null     int64  
 8   zero.3  10 non-null     int64  
 9   zero.4  10 non-null     int64  
 10  zero.5  10 non-null     int64  
dtypes: float64(1), int64(10)
memory usage: 1008.0 bytes


In [162]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [164]:
df.select_dtypes(include=np.floating)

Unnamed: 0,Fare
0,7.25
1,71.2833
2,7.925
3,53.1
4,8.05
5,8.4583
6,51.8625
7,21.075
8,11.1333
9,30.0708


In [167]:
df.select_dtypes(exclude=np.floating)

Unnamed: 0,P_id,Age,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,0,1,0,0,0,0,0,0
1,2,38,1,1,0,0,0,0,0,0
2,3,26,1,0,0,0,0,0,0,0
3,4,35,1,1,0,0,0,0,0,0
4,5,35,0,0,0,0,0,0,0,0
5,6,28,0,0,0,0,0,0,0,0
6,7,54,0,0,0,0,0,0,0,0
7,8,2,0,3,0,0,0,0,0,0
8,9,27,1,0,0,0,0,0,0,0
9,10,14,1,1,0,0,0,0,0,0


In [168]:
df.select_dtypes(include=np.integer)

Unnamed: 0,P_id,Age,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,0,1,0,0,0,0,0,0
1,2,38,1,1,0,0,0,0,0,0
2,3,26,1,0,0,0,0,0,0,0
3,4,35,1,1,0,0,0,0,0,0
4,5,35,0,0,0,0,0,0,0,0
5,6,28,0,0,0,0,0,0,0,0
6,7,54,0,0,0,0,0,0,0,0
7,8,2,0,3,0,0,0,0,0,0
8,9,27,1,0,0,0,0,0,0,0
9,10,14,1,1,0,0,0,0,0,0


In [171]:
df.values

array([[ 1.    , 22.    ,  7.25  ,  0.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 2.    , 38.    , 71.2833,  1.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 3.    , 26.    ,  7.925 ,  1.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 4.    , 35.    , 53.1   ,  1.    ,  1.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 5.    , 35.    ,  8.05  ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 6.    , 28.    ,  8.4583,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 7.    , 54.    , 51.8625,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 8.    ,  2.    , 21.075 ,  0.    ,  3.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ],
       [ 9.    , 27.    , 11.1333,  1.    ,  0.    ,  0.    ,  0.    ,
 

In [178]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [179]:
df.count()

P_id      10
Age       10
Fare      10
Sex       10
sibsp     10
zero      10
zero.1    10
zero.2    10
zero.3    10
zero.4    10
zero.5    10
dtype: int64

In [190]:
df = pd.DataFrame([[53,'NA',33,53,np.nan,np.inf,43],[53,34,53,pd.NA, None,'NaN', None]]);
df

Unnamed: 0,0,1,2,3,4,5,6
0,53,,33,53.0,,inf,43.0
1,53,34.0,53,,,,


In [191]:
df.dtypes

0      int64
1     object
2      int64
3     object
4    float64
5     object
6    float64
dtype: object

In [192]:
df.count()

0    2
1    2
2    2
3    1
4    0
5    2
6    1
dtype: int64

In [194]:
df.count(axis='columns')

0    6
1    4
dtype: int64

In [195]:
df.count(axis='index')

0    2
1    2
2    2
3    1
4    0
5    2
6    1
dtype: int64

In [197]:
df

Unnamed: 0,0,1,2,3,4,5,6
0,53,,33,53.0,,inf,43.0
1,53,34.0,53,,,,


In [196]:
df.count(numeric_only=True)

0    2
2    2
4    0
6    1
dtype: int64

### DataFrame.assign(**kwargs)
- Assign new columns to a DataFrame.

In [201]:
df = pd.read_csv('csv_like_file.txt', sep='\t');
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [211]:
df.assign(Fare = lambda x: x.Fare + x.Fare/100*10)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.975,0,1,0,0,0,0,0,0
1,2,38,78.41163,1,1,0,0,0,0,0,0
2,3,26,8.7175,1,0,0,0,0,0,0,0
3,4,35,58.41,1,1,0,0,0,0,0,0
4,5,35,8.855,0,0,0,0,0,0,0,0
5,6,28,9.30413,0,0,0,0,0,0,0,0
6,7,54,57.04875,0,0,0,0,0,0,0,0
7,8,2,23.1825,0,3,0,0,0,0,0,0
8,9,27,12.24663,1,0,0,0,0,0,0,0
9,10,14,33.07788,1,1,0,0,0,0,0,0


In [213]:
df.assign(Name = 'akram')

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5,Name
0,1,22,7.25,0,1,0,0,0,0,0,0,akram
1,2,38,71.2833,1,1,0,0,0,0,0,0,akram
2,3,26,7.925,1,0,0,0,0,0,0,0,akram
3,4,35,53.1,1,1,0,0,0,0,0,0,akram
4,5,35,8.05,0,0,0,0,0,0,0,0,akram
5,6,28,8.4583,0,0,0,0,0,0,0,0,akram
6,7,54,51.8625,0,0,0,0,0,0,0,0,akram
7,8,2,21.075,0,3,0,0,0,0,0,0,akram
8,9,27,11.1333,1,0,0,0,0,0,0,0,akram
9,10,14,30.0708,1,1,0,0,0,0,0,0,akram


In [233]:
df.assign(Fare = lambda x: x.Fare + x.Fare/100*20,  Sex = lambda x: [1 if i==0 else 0 for i in x.Sex])

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,8.7,1,1,0,0,0,0,0,0
1,2,38,85.53996,0,1,0,0,0,0,0,0
2,3,26,9.51,0,0,0,0,0,0,0,0
3,4,35,63.72,0,1,0,0,0,0,0,0
4,5,35,9.66,1,0,0,0,0,0,0,0
5,6,28,10.14996,1,0,0,0,0,0,0,0
6,7,54,62.235,1,0,0,0,0,0,0,0
7,8,2,25.29,1,3,0,0,0,0,0,0
8,9,27,13.35996,0,0,0,0,0,0,0,0
9,10,14,36.08496,0,1,0,0,0,0,0,0


### DataFrame.get(key, default=None)
- Get item from object for given key (ex: DataFrame column).
- Returns default value if not found.

In [234]:
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [235]:
df.get('P_id')

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
Name: P_id, dtype: int64

In [236]:
df.get('P_id','Age')

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
Name: P_id, dtype: int64

In [237]:
df.get(['P_id','Age'])

Unnamed: 0,P_id,Age
0,1,22
1,2,38
2,3,26
3,4,35
4,5,35
5,6,28
6,7,54
7,8,2
8,9,27
9,10,14


In [240]:
df = pd.DataFrame(
    [
        [24.3, 75.7, "high"],
        [31, 87.8, "high"],
        [22, 71.6, "medium"],
        [35, 95, "medium"],
    ],
    columns=["temp_celsius", "temp_fahrenheit", "windspeed"],
    index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"),
)

In [241]:
df

Unnamed: 0,temp_celsius,temp_fahrenheit,windspeed
2014-02-12,24.3,75.7,high
2014-02-13,31.0,87.8,high
2014-02-14,22.0,71.6,medium
2014-02-15,35.0,95.0,medium


In [243]:
ser = df['windspeed']
ser

2014-02-12      high
2014-02-13      high
2014-02-14    medium
2014-02-15    medium
Freq: D, Name: windspeed, dtype: object

In [244]:
ser.get('2014-02-12')

'high'

In [245]:
df

Unnamed: 0,temp_celsius,temp_fahrenheit,windspeed
2014-02-12,24.3,75.7,high
2014-02-13,31.0,87.8,high
2014-02-14,22.0,71.6,medium
2014-02-15,35.0,95.0,medium


In [246]:
df.get('temp_celsius')

2014-02-12    24.3
2014-02-13    31.0
2014-02-14    22.0
2014-02-15    35.0
Freq: D, Name: temp_celsius, dtype: float64

In [250]:
df.get(['tmp_celsius','windspeed'], default='temp_fahrenheit')

'temp_fahrenheit'

### df.apply(func, axis=0, row=False, result_type=None, args(), by_row='compat', **kwargs)

In [4]:
df = pd.DataFrame([[4,9]]*3, columns=["A","B"]);
df

Unnamed: 0,A,B
0,4,9
1,4,9
2,4,9


In [5]:
np.sum(df)

A    12
B    27
dtype: int64

In [6]:
np.sum(df, axis=1)

0    13
1    13
2    13
dtype: int64

In [8]:
df.apply(np.sum)

A    12
B    27
dtype: int64

In [9]:
df.apply(np.sum, axis=1)

0    13
1    13
2    13
dtype: int64

In [24]:
def square(x):
    return (x**2);
    
df.apply(square)

Unnamed: 0,A,B
0,16,81
1,16,81
2,16,81


### # pd.apply()   this function pass rows or columns one by one that you specify in index and apply the given function to it.


In [38]:

def Sum(data):
    s = 0;
    for item in data:
        s+=item;
    return s;
df.apply(Sum, axis=1)

0    13
1    13
2    13
dtype: int64

In [40]:
# we can prove the above statement

def  Sum(data):
    print(data);             # this line will print evey row or column passed ot it.
    s = 0;
    for item in data:
        s+=item;
    return s;

df.apply(Sum)

0    4
1    4
2    4
Name: A, dtype: int64
0    9
1    9
2    9
Name: B, dtype: int64


A    12
B    27
dtype: int64

In [45]:
def  Sum(data):
    print(data);             # this line will print evey row or column passed ot it.
    s = 0;
    for item in data:
        s+=item;
    return s;

df.apply(Sum, axis=1, raw=True)   # raw parameter specify whether to pass ndarray objects or Series object instead.
                                  # raw = True   will pass ndarray objects to function

[4 9]
[4 9]
[4 9]


0    13
1    13
2    13
dtype: int64

In [43]:
def  Sum(data):
    print(data);             # this line will print evey row or column passed ot it.
    s = 0;
    for item in data:
        s+=item;
    return s;

df.apply(Sum, axis=0, raw=True)

[4 4 4]
[9 9 9]


A    12
B    27
dtype: int64

In [44]:
def  Sum(data):
    print(data);             # this line will print evey row or column passed ot it.
    s = 0;
    for item in data:
        s+=item;
    return s;

df.apply(Sum, axis=0, raw=False)

0    4
1    4
2    4
Name: A, dtype: int64
0    9
1    9
2    9
Name: B, dtype: int64


A    12
B    27
dtype: int64

In [58]:
df. apply(np.sum, axis=1, raw=True, result_type='expand')

0    13
1    13
2    13
dtype: int64

In [57]:
df. apply(np.sum, axis=1, raw=True, result_type='reduce')

0    13
1    13
2    13
dtype: int64

In [56]:
df. apply(np.sum, axis=1, raw=True, result_type='broadcast')

Unnamed: 0,A,B
0,13,13
1,13,13
2,13,13


In [60]:
df.count()

A    3
B    3
dtype: int64

In [64]:
df.count(axis=1, numeric_only=True)

0    2
1    2
2    2
dtype: int64

In [66]:
df

Unnamed: 0,A,B
0,4,9
1,4,9
2,4,9


### df.cummin(axis=None, skipna=True, *args, **kwargs);

In [67]:
df.cummin(axis=1)

Unnamed: 0,A,B
0,4,4
1,4,4
2,4,4


In [71]:
df.cumprod(axis=0)

Unnamed: 0,A,B
0,4,9
1,16,81
2,64,729


In [72]:
df.cumsum(axis=1)

Unnamed: 0,A,B
0,4,13
1,4,13
2,4,13


In [73]:
df.cummax(axis=1)

Unnamed: 0,A,B
0,4,9
1,4,9
2,4,9


In [74]:
df.describe()

Unnamed: 0,A,B
count,3.0,3.0
mean,4.0,9.0
std,0.0,0.0
min,4.0,9.0
25%,4.0,9.0
50%,4.0,9.0
75%,4.0,9.0
max,4.0,9.0


In [75]:
type(df.describe())

pandas.core.frame.DataFrame

In [76]:
df.std()

A    0.0
B    0.0
dtype: float64

In [77]:
df.mean()

A    4.0
B    9.0
dtype: float64

In [79]:
df.count()

A    3
B    3
dtype: int64

In [80]:
df.min()

A    4
B    9
dtype: int64

In [82]:
df.max()

A    4
B    9
dtype: int64

### pd.round(decimals=0, *args, **kwargs)

In [83]:
df = pd.read_csv('csv_like_file.txt', sep='\t');
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [84]:
df['Fare'].round(decimals=2)

0     7.25
1    71.28
2     7.92
3    53.10
4     8.05
5     8.46
6    51.86
7    21.08
8    11.13
9    30.07
Name: Fare, dtype: float64

In [85]:
df.round(2)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.28,1,1,0,0,0,0,0,0
2,3,26,7.92,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.46,0,0,0,0,0,0,0,0
6,7,54,51.86,0,0,0,0,0,0,0,0
7,8,2,21.08,0,3,0,0,0,0,0,0
8,9,27,11.13,1,0,0,0,0,0,0,0
9,10,14,30.07,1,1,0,0,0,0,0,0


In [90]:
df = pd.DataFrame([[23.424, 53.2353, 634.634],[644.644, 642.644, 74.6342],[64.235, 75.357, 745.2356]], columns=['col1','col2','col3']);
df

Unnamed: 0,col1,col2,col3
0,23.424,53.2353,634.634
1,644.644,642.644,74.6342
2,64.235,75.357,745.2356


In [92]:
df.round({'col1':2, 'col2':1, 'col3':3})

Unnamed: 0,col1,col2,col3
0,23.42,53.2,634.634
1,644.64,642.6,74.634
2,64.24,75.4,745.236


In [93]:
df.round(pd.Series([1,2,3], index=['col1','col2','col3']))

Unnamed: 0,col1,col2,col3
0,23.4,53.24,634.634
1,644.6,642.64,74.634
2,64.2,75.36,745.236


In [94]:
df

Unnamed: 0,col1,col2,col3
0,23.424,53.2353,634.634
1,644.644,642.644,74.6342
2,64.235,75.357,745.2356


In [95]:
df.rpow(3)

Unnamed: 0,col1,col2,col3
0,149999000000.0,2.510112e+25,6.271485e+302
1,3.7441580000000003e+307,4.160175e+306,4.069707e+35
2,4.4451229999999997e+30,9.003811e+35,inf


In [96]:
df.rpow(2)

Unnamed: 0,col1,col2,col3
0,11254510.0,1.060284e+16,1.106293e+191
1,1.140724e+194,2.85181e+193,2.93179e+22
2,2.17101e+19,4.838574e+22,2.1790610000000002e+224


In [98]:
pd.DataFrame([3,3,3]).rpow(2)

Unnamed: 0,0
0,8
1,8
2,8


In [100]:
pd.DataFrame([3,3,3]).pow(2)

Unnamed: 0,0
0,9
1,9
2,9


### DataFrame.ge(other, axis='columns', level=None)
- DataFrame.eq()
- DataFrame.ne()
- DataFrame.le()
- DataFrame.lt()
- DataFrame.ge()
- DataFrame.gt()

In [101]:
df

Unnamed: 0,col1,col2,col3
0,23.424,53.2353,634.634
1,644.644,642.644,74.6342
2,64.235,75.357,745.2356


In [102]:
df.ge(500)

Unnamed: 0,col1,col2,col3
0,False,False,True
1,True,True,False
2,False,False,True


In [103]:
df>=500

Unnamed: 0,col1,col2,col3
0,False,False,True
1,True,True,False
2,False,False,True


In [104]:
df.ge([400,500,600], axis='index')

Unnamed: 0,col1,col2,col3
0,False,False,True
1,True,True,False
2,False,False,True


In [105]:
df.ge([400,500,600], axis='columns')

Unnamed: 0,col1,col2,col3
0,False,False,True
1,True,True,False
2,False,False,True


In [117]:
df = pd.read_csv('csv_like_file.txt', sep='\t');
df

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [118]:
df.head(3)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0


In [119]:
df.tail(3)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [120]:
df.tail(10)

Unnamed: 0,P_id,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,1,22,7.25,0,1,0,0,0,0,0,0
1,2,38,71.2833,1,1,0,0,0,0,0,0
2,3,26,7.925,1,0,0,0,0,0,0,0
3,4,35,53.1,1,1,0,0,0,0,0,0
4,5,35,8.05,0,0,0,0,0,0,0,0
5,6,28,8.4583,0,0,0,0,0,0,0,0
6,7,54,51.8625,0,0,0,0,0,0,0,0
7,8,2,21.075,0,3,0,0,0,0,0,0
8,9,27,11.1333,1,0,0,0,0,0,0,0
9,10,14,30.0708,1,1,0,0,0,0,0,0


In [121]:
df.pop(item='P_id')    #  return item and drop from frame. item:  the label of column to be popped.

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
Name: P_id, dtype: int64

In [122]:
df

Unnamed: 0,Age,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,22,7.25,0,1,0,0,0,0,0,0
1,38,71.2833,1,1,0,0,0,0,0,0
2,26,7.925,1,0,0,0,0,0,0,0
3,35,53.1,1,1,0,0,0,0,0,0
4,35,8.05,0,0,0,0,0,0,0,0
5,28,8.4583,0,0,0,0,0,0,0,0
6,54,51.8625,0,0,0,0,0,0,0,0
7,2,21.075,0,3,0,0,0,0,0,0
8,27,11.1333,1,0,0,0,0,0,0,0
9,14,30.0708,1,1,0,0,0,0,0,0


In [123]:
df.pop('Age')

0    22
1    38
2    26
3    35
4    35
5    28
6    54
7     2
8    27
9    14
Name: Age, dtype: int64

In [124]:
df

Unnamed: 0,Fare,Sex,sibsp,zero,zero.1,zero.2,zero.3,zero.4,zero.5
0,7.25,0,1,0,0,0,0,0,0
1,71.2833,1,1,0,0,0,0,0,0
2,7.925,1,0,0,0,0,0,0,0
3,53.1,1,1,0,0,0,0,0,0
4,8.05,0,0,0,0,0,0,0,0
5,8.4583,0,0,0,0,0,0,0,0
6,51.8625,0,0,0,0,0,0,0,0
7,21.075,0,3,0,0,0,0,0,0
8,11.1333,1,0,0,0,0,0,0,0
9,30.0708,1,1,0,0,0,0,0,0


In [128]:
df.all(bool())

Fare       True
Sex       False
sibsp     False
zero      False
zero.1    False
zero.2    False
zero.3    False
zero.4    False
zero.5    False
dtype: bool

In [130]:
arr = np.array([[[235,332,234],[521,433,642]]]);
arr

array([[[235, 332, 234],
        [521, 433, 642]]])

In [131]:
np.squeeze(arr)

array([[235, 332, 234],
       [521, 433, 642]])

### pd.insert(loc, column, value, allow_duplicates=_NoDefault.no_default)

In [141]:
df = pd.DataFrame([[34,35,23]]*5, columns=['C1','C2','C3']);
df

Unnamed: 0,C1,C2,C3
0,34,35,23
1,34,35,23
2,34,35,23
3,34,35,23
4,34,35,23


In [142]:
df.insert(2, column='C4', value=[42]*5)

In [143]:
df

Unnamed: 0,C1,C2,C4,C3
0,34,35,42,23
1,34,35,42,23
2,34,35,42,23
3,34,35,42,23
4,34,35,42,23
