# Pandas Row and Column Wise Operations

In [49]:
import pandas as pd
import numpy as np

# Load Json Dataset

In [50]:
df = pd.read_json('dataset/data.json', orient='columns')
df.head()
df.tail()

Unnamed: 0,name,Age,Blood_Group,Nationality
9,Mehedi,32,B+,Bangladeshi
10,Momin,22,A+,Bangladeshi
11,Farsan,29,O+,Bangladeshi
12,Fatin,26,AB+,Bangladeshi
13,Shakil,26,B+,Bangladeshi


# Column Selection

In [51]:
# Select 1st column
df['name']
type(df['name'])

# select multiple column
df[['name','Age']].head()

Unnamed: 0,name,Age
0,Sharif,26
1,Imran,25
2,Akib,24
3,Hanif,26
4,Irfan,32


# Row Selection
Pandas provide a unique method to retrieve rows from a Data frame. DataFrame.loc[] method is used 
to retrieve rows from Pandas DataFrame. Rows can also be selected by passing integer location to an iloc[] function.

**.loc[ ] :** This function selects data by the label of the rows and columns. The **DataFrame.loc** indexer selects data in a different way than just the indexing operator. It can select subsets of rows or columns. It can also simultaneously select subsets of rows and columns.

**.iloc[ ] :** This function allows us to retrieve rows and columns by position. In order to do that, we’ll need to specify the positions of the rows that we want, and the positions of the columns that we want as well. The df.iloc indexer is very similar to **DataFrame.loc** but only uses integer locations to make its selections.

In [52]:
# Select any single Row
df.iloc[0]

df.head(3)  # to select 1st 3 rows

df.tail(3)  # to select last 3 rows

# Select any number of Rows Using list slicing operation
df[2:5]

Unnamed: 0,name,Age,Blood_Group,Nationality
2,Akib,24,AB+,Bangladeshi
3,Hanif,26,A+,Bangladeshi
4,Irfan,32,A+,Bangladeshi


# Select Through Conditions

In [53]:
df[ df['Age']> 30 ]   # select All rows where age>30

df[ df['Blood_Group']=='AB+']   # select All rows where Blood Group is 'AB+'

Unnamed: 0,name,Age,Blood_Group,Nationality
2,Akib,24,AB+,Bangladeshi
5,Akash,36,AB+,Bangladeshi
12,Fatin,26,AB+,Bangladeshi


# Insert Column

In [54]:
#insert a new column at last
two_times_age = df['Age']*2
df['Two Times Age'] = two_times_age
df.head()

# Insert a new column at specific index/location
try:
    df.insert(3, "Three Times Age", df["Age"]*3)
except:
    pass
df.head()

Unnamed: 0,name,Age,Blood_Group,Three Times Age,Nationality,Two Times Age
0,Sharif,26,B+,78,Bangladeshi,52
1,Imran,25,B+,75,Bangladeshi,50
2,Akib,24,AB+,72,Bangladeshi,48
3,Hanif,26,A+,78,Bangladeshi,52
4,Irfan,32,A+,96,Bangladeshi,64


# Delete Column

In Order to delete a column in Pandas DataFrame, we can use the drop() method. Columns is deleted by dropping columns with column names.

In [55]:
df.drop(["Three Times Age"],axis = 1).head()

Unnamed: 0,name,Age,Blood_Group,Nationality,Two Times Age
0,Sharif,26,B+,Bangladeshi,52
1,Imran,25,B+,Bangladeshi,50
2,Akib,24,AB+,Bangladeshi,48
3,Hanif,26,A+,Bangladeshi,52
4,Irfan,32,A+,Bangladeshi,64


# Split Dataframe 

In [56]:
dataframe = pd.read_csv('dataset/circle_employee.csv',index_col='user_id')  # Load data
dataframe.head()

Unnamed: 0_level_0,name,age,blood_group,gender,experience,designation,salary
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Sharif,,B+,male,1.5,Jr Software Engineer,30000
2,Kanan Mahmud,28.0,,Male,7.5,Sr Software Engineer,80000
3,Md. Shakil,27.0,B-,Male,3.5,Software Engineer,45000
4,Imran Sheikh,25.0,B-,Male,1.8,Jr Software Engineer,30000
5,Farsan Rashid,27.0,O+,Male,4.2,Software Engineer,55000


In [57]:
salary = dataframe.iloc[:, 6:7 ] # all rows and only last column/salary column
salary.head()

Unnamed: 0_level_0,salary
user_id,Unnamed: 1_level_1
1,30000
2,80000
3,45000
4,30000
5,55000


In [58]:
data = dataframe.iloc[:, :6 ] # all rows and all column except salary column
data.head()

Unnamed: 0_level_0,name,age,blood_group,gender,experience,designation
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Sharif,,B+,male,1.5,Jr Software Engineer
2,Kanan Mahmud,28.0,,Male,7.5,Sr Software Engineer
3,Md. Shakil,27.0,B-,Male,3.5,Software Engineer
4,Imran Sheikh,25.0,B-,Male,1.8,Jr Software Engineer
5,Farsan Rashid,27.0,O+,Male,4.2,Software Engineer


## Split Using Pop Method

In [59]:
# another way is poping a column and this will also split the dataset. See the result for better understanding.
salary = dataframe.pop('salary')
salary.head()

user_id
1    30000
2    80000
3    45000
4    30000
5    55000
Name: salary, dtype: int64

In [60]:
dataframe.head()

Unnamed: 0_level_0,name,age,blood_group,gender,experience,designation
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Sharif,,B+,male,1.5,Jr Software Engineer
2,Kanan Mahmud,28.0,,Male,7.5,Sr Software Engineer
3,Md. Shakil,27.0,B-,Male,3.5,Software Engineer
4,Imran Sheikh,25.0,B-,Male,1.8,Jr Software Engineer
5,Farsan Rashid,27.0,O+,Male,4.2,Software Engineer
