In [11]:
import pandas as pd
import numpy as np

data = np.random.randint(10, 50, (10, 4))
columns = [f'col{i}' for i in range(1, 5)]
index = [chr(code) for code in range(65, 75)]
df = pd.DataFrame(data=data, columns=columns, index=index)

print(f"Data Frame: \n{df}")
print(f"\nColumn 1: \n{df['col1']}")
print(f"\nType of DF: {type(df)}")
print(f"Rows of DF: {list(df.index)}")
print(f"Columns of DF: {list(df.columns)}")
print(f"Shape of DF: {df.shape}")
print(f"Type of Column 1: {type(df['col1'])}")

Data Frame: 
   col1  col2  col3  col4
A    26    21    21    10
B    20    11    41    33
C    26    38    20    45
D    15    29    42    42
E    20    13    30    45
F    12    18    34    14
G    20    47    35    45
H    24    48    13    28
I    29    37    27    11
J    13    17    32    35

Column 1: 
A    26
B    20
C    26
D    15
E    20
F    12
G    20
H    24
I    29
J    13
Name: col1, dtype: int32

Type of DF: <class 'pandas.core.frame.DataFrame'>
Rows of DF: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
Columns of DF: ['col1', 'col2', 'col3', 'col4']
Shape of DF: (10, 4)
Type of Column 1: <class 'pandas.core.series.Series'>


In [15]:
print(f"Column Selection: \n{df[['col3', 'col2']]}")
print(f"\nRow J: \n{df.loc['J']}")
print(f"\nRow 5: \n{df.iloc[6]}")

Column Selection: 
   col3  col2
A    21    21
B    41    11
C    20    38
D    42    29
E    30    13
F    34    18
G    35    47
H    13    48
I    27    37
J    32    17

Row J: 
col1    13
col2    17
col3    32
col4    35
Name: J, dtype: int32

Row 5: 
col1    20
col2    47
col3    35
col4    45
Name: G, dtype: int32


In [19]:
print(f"Selection using Square Brackets: \n{df.loc[['C', 'D', 'H', 'I']][['col1', 'col3']]}")
print(f"\nSelection using Comma: \n{df.loc[['C', 'D', 'H', 'I'], ['col1', 'col3']]}")
print(f"\nSelection using loc: \n{df[['col1', 'col3']].loc[['C', 'D', 'H', 'I']]}")

Selection using Square Brackets: 
   col1  col3
C    26    20
D    15    42
H    24    13
I    29    27

Selection using Comma: 
   col1  col3
C    26    20
D    15    42
H    24    13
I    29    27

Selection using loc: 
   col1  col3
C    26    20
D    15    42
H    24    13
I    29    27


In [23]:
print(f"Alternate Rows: \n{df.iloc[::2]}")
print(f"\nLast 5 Rows: \n{df.iloc[-5:]}")
print(f"\nExcept Last 5 Rows: \n{df.iloc[:-5]}")

Alternate Rows: 
   col1  col2  col3  col4
A    26    21    21    10
C    26    38    20    45
E    20    13    30    45
G    20    47    35    45
I    29    37    27    11

Last 5 Rows: 
   col1  col2  col3  col4
F    12    18    34    14
G    20    47    35    45
H    24    48    13    28
I    29    37    27    11
J    13    17    32    35

Except Last 5 Rows: 
   col1  col2  col3  col4
A    26    21    21    10
B    20    11    41    33
C    26    38    20    45
D    15    29    42    42
E    20    13    30    45


In [26]:
print(f"Rows 4 to 6, Columns 1 to 3: \n{df.iloc[4:7,0:3]}")
print(f"\nEvery 3rd Row and 2nd Column: \n{df.iloc[::3,::2]}")

Rows 4 to 6, Columns 1 to 3: 
   col1  col2  col3
E    20    13    30
F    12    18    34
G    20    47    35

Every 3rd Row and 2nd Column: 
   col1  col3
A    26    21
D    15    42
G    20    35
J    13    32


In [35]:
df['total'] = df['col1'] + df['col2'] + df['col3'] + df['col4']
df.loc['total'] = df.sum()
df.loc['J'] = [10, 20, 30, 40, 50]
df.loc['K'] = [100, 200, 300, 400, 500]
print(f"Data Frame: \n{df}")

Data Frame: 
        col1    col2    col3    col4   total
A       26.0    21.0    21.0    10.0    78.0
B       20.0    11.0    41.0    33.0   105.0
C       26.0    38.0    20.0    45.0   129.0
D       15.0    29.0    42.0    42.0   128.0
E       20.0    13.0    30.0    45.0   108.0
F       12.0    18.0    34.0    14.0    78.0
G       20.0    47.0    35.0    45.0   147.0
H       24.0    48.0    13.0    28.0   113.0
I       29.0    37.0    27.0    11.0   104.0
J       10.0    20.0    30.0    40.0    50.0
K      100.0   200.0   300.0   400.0   500.0
total  906.0  1446.0  1779.0  2139.0  6270.0


In [36]:
df.drop('total', axis=1, inplace=True) # inplace = permanent)
df.drop('total', axis=0, inplace=True) # inplace = permanent)
print(f"Data Frame: \n{df}")

Data Frame: 
    col1   col2   col3   col4
A   26.0   21.0   21.0   10.0
B   20.0   11.0   41.0   33.0
C   26.0   38.0   20.0   45.0
D   15.0   29.0   42.0   42.0
E   20.0   13.0   30.0   45.0
F   12.0   18.0   34.0   14.0
G   20.0   47.0   35.0   45.0
H   24.0   48.0   13.0   28.0
I   29.0   37.0   27.0   11.0
J   10.0   20.0   30.0   40.0
K  100.0  200.0  300.0  400.0


In [37]:
df['col1'] + df['col2'] - df['col3']

A    26.0
B   -10.0
C    44.0
D     2.0
E     3.0
F    -4.0
G    32.0
H    59.0
I    39.0
J     0.0
K     0.0
dtype: float64

In [43]:
filter = (df['col1'] < 60) & (df['col2'] > 30)
print(f"Filtered Data Frame: \n{df[filter]}")

filter = (df['col1'] % 2 == 0) & (df['col2'] % 2 == 0)
print(f"\nFiltered Data Frame: \n{df[filter]}")

Filtered Data Frame: 
   col1  col2  col3  col4
C  26.0  38.0  20.0  45.0
G  20.0  47.0  35.0  45.0
H  24.0  48.0  13.0  28.0
I  29.0  37.0  27.0  11.0

Filtered Data Frame: 
    col1   col2   col3   col4
C   26.0   38.0   20.0   45.0
F   12.0   18.0   34.0   14.0
H   24.0   48.0   13.0   28.0
J   10.0   20.0   30.0   40.0
K  100.0  200.0  300.0  400.0


In [44]:
print(f"Sum of Column 1: {df['col1'].sum()}")
print(f"Median of Column 1: {df['col1'].median()}")

Sum of Column 1: 302.0
Median of Column 1: 20.0


In [55]:
# Reading CSV
df = pd.read_csv('./diet.csv')
print(f"Data Frame:\n{df}")

print(f"\nFirst 3 Recods:\n{df[:3]}")

filter = df['gender'] == 'Female'
print(f"\nFemale Records:\n{df[filter]}")

filter = df['initial.weight'] < 60
print(f"\nWeight Less than 60:\n{df[filter]}")

Data Frame:
    id  gender  age  height diet.type  initial.weight  final.weight
0    1  Female   22     159         A              58          54.2
1    2  Female   46     192         A              60          54.0
2    3  Female   55     170         A              64          63.3
3    4  Female   33     171         A              64          61.1
4    5  Female   50     170         A              65          62.2
..  ..     ...  ...     ...       ...             ...           ...
71  74    Male   35     183         C              83          80.2
72  75    Male   49     177         C              84          79.9
73  76    Male   28     164         C              85          79.7
74  77    Male   40     167         C              87          77.8
75  78    Male   51     175         C              88          81.9

[76 rows x 7 columns]

First 3 Recods:
   id  gender  age  height diet.type  initial.weight  final.weight
0   1  Female   22     159         A              58          54.