In [75]:
import pandas as pd
import numpy as np

In [76]:
# DataFrame parameters and attributes
df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
print("DataFrame Parameters:")
print(f"columns: {df.columns}")
print(f"index: {df.index}")
print(f"dtypes: {df.dtypes}")
print(f"shape: {df.shape}")
print(f"size: {df.size}")
print(f"values: {df.values}")
print(f"axes: {df.axes}")

DataFrame Parameters:
columns: Index(['A', 'B'], dtype='object')
index: RangeIndex(start=0, stop=3, step=1)
dtypes: A     int64
B    object
dtype: object
shape: (3, 2)
size: 6
values: [[1 'a']
 [2 'b']
 [3 'c']]
axes: [RangeIndex(start=0, stop=3, step=1), Index(['A', 'B'], dtype='object')]


Mutation in DataFrame

In [77]:
data={
        "calories":[400,200,100,300,270],
        "duration":[50,42,35,67,61]
     }

print('------data------')
df=pd.DataFrame(data)
print(df)

print('------.loc-----') 
print(df.loc[1:3])   # includes last index

print('------.iloc-----')
print(df.iloc[1:3])  # excludes last index

print("Calories at 0 index:",df.loc[0,'calories'])
print("Calories at 0 index:",df.iloc[0,0])

------data------
   calories  duration
0       400        50
1       200        42
2       100        35
3       300        67
4       270        61
------.loc-----
   calories  duration
1       200        42
2       100        35
3       300        67
------.iloc-----
   calories  duration
1       200        42
2       100        35
Calories at 0 index: 400
Calories at 0 index: 400


In [78]:
# Add new row and column in DataFrame

# 1-add column
print('------add column------')
df['age'] = [23,34,56,44,34]
print(df)

# 2-add row
print('------add row------')
df.loc[5] = [230,45,43]
print(df)

------add column------
   calories  duration  age
0       400        50   23
1       200        42   34
2       100        35   56
3       300        67   44
4       270        61   34
------add row------
   calories  duration  age
0       400        50   23
1       200        42   34
2       100        35   56
3       300        67   44
4       270        61   34
5       230        45   43


In [79]:
# Update values

# Single value update
print('------updating single value------')
df.loc[1,'age'] = 30
print(df)

# Update column based on condition
print("------updating column['calories'] based on condition------")
df.loc[df['calories']==400, 'age'] = 22
print(df)

# with .iloc
print('------with .iloc------')
df.iloc[df['calories']==400, 2] = 50
print(df)

df.loc[df['duration']>60,'duration'] = 60
print(df)

------updating single value------
   calories  duration  age
0       400        50   23
1       200        42   30
2       100        35   56
3       300        67   44
4       270        61   34
5       230        45   43
------updating column['calories'] based on condition------
   calories  duration  age
0       400        50   22
1       200        42   30
2       100        35   56
3       300        67   44
4       270        61   34
5       230        45   43
------with .iloc------
   calories  duration  age
0       400        50   50
1       200        42   30
2       100        35   56
3       300        67   44
4       270        61   34
5       230        45   43
   calories  duration  age
0       400        50   50
1       200        42   30
2       100        35   56
3       300        60   44
4       270        60   34
5       230        45   43


In [80]:
# insert

# 1- insert a column
print('------inserting a new column------')
df.insert(1,'x',[101,102,103,104,105,106])
print(df)

# 2- insert a row
print('------inserting a new row------')
df1 = pd.DataFrame({'calories':[200], 'x':[43], 'duration':[54], 'age':[31]})
df = pd.concat([df.loc[:3],df1,df.loc[4:]])   # gives random index
print(df)

print('------inserting a new column(with reseting indices)------')
df1 = pd.DataFrame({'calories':[200], 'x':[43], 'duration':[54], 'age':[31]})
df = pd.concat([df.loc[:3],df1,df.loc[4:]]).reset_index(drop=True)  # resets index in order
print(df)

------inserting a new column------
   calories    x  duration  age
0       400  101        50   50
1       200  102        42   30
2       100  103        35   56
3       300  104        60   44
4       270  105        60   34
5       230  106        45   43
------inserting a new row------
   calories    x  duration  age
0       400  101        50   50
1       200  102        42   30
2       100  103        35   56
3       300  104        60   44
0       200   43        54   31
4       270  105        60   34
5       230  106        45   43
------inserting a new column(with reseting indices)------
   calories    x  duration  age
0       400  101        50   50
1       200  102        42   30
2       100  103        35   56
3       300  104        60   44
4       200   43        54   31
5       270  105        60   34
6       230  106        45   43


In [81]:
# delete

# 1-delete a column
print('------delete a column(del)------')
del df['x']
print(df)

print('------inserting a new column(drop())------')
df = df.drop(columns='age')
print(df)

# 2- delete a row
print('------delete a row------')
df = df.drop(3)
print(df)

------delete a column(del)------
   calories  duration  age
0       400        50   50
1       200        42   30
2       100        35   56
3       300        60   44
4       200        54   31
5       270        60   34
6       230        45   43
------inserting a new column(drop())------
   calories  duration
0       400        50
1       200        42
2       100        35
3       300        60
4       200        54
5       270        60
6       230        45
------delete a row------
   calories  duration
0       400        50
1       200        42
2       100        35
4       200        54
5       270        60
6       230        45


In [86]:
print(df)

print('------reset_index()------')
print(df.reset_index(drop=True))

   calories  duration
0       400        50
1       200        42
2       100        35
4       200        54
5       270        60
6       230        45
------reset_index()------
   calories  duration
0       400        50
1       200        42
2       100        35
3       200        54
4       270        60
5       230        45
