# Dataframes III

In [None]:
import numpy as np
import pandas as pd

In [None]:
bond = pd.read_csv('datasets/jamesbond.csv')
bond.head()

# set_index() and reset_index()

    1. The set_index() is used to set a particular column as an index of the dataframe. It drops that column from
       the dataframe which is used to be used as index.
    
    2. The reset_index() is used to give the default indexing to a dataframe. It doesn't drop the original index.
       Instead it makes it as a new column in the dataframe.

In [None]:
film_bond = bond.set_index('Film')
film_bond.head()

    Note that it removed the 'Film' column from the dataframe completely and made it as index.

    We can now reset the indexing in film_bond to default pandas implicit index by using the reset_index() method
    This method will set the default index and make the current index as the column in the dataframe.

In [None]:
film_bond_reset = film_bond.reset_index()
film_bond_reset.head()

    We can also decide to completely drop the current index by setting drop = True.

In [None]:
film_bond_reset_2 = film_bond.reset_index(drop = True)
film_bond_reset_2.head()

# iloc and loc accessors

In [None]:
import numpy as np
import pandas as pd

In [None]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film')
bond.head(10)

In [None]:
bond.loc['Dr. No']

In [None]:
bond.head(10)

In [None]:
bond.loc['Dr. No': 'Thunderball']

In [None]:
bond.head(10)

In [None]:
bond.loc['Casino Royale']

In [None]:
bond.head(10)

In [None]:
bond.loc[['Dr. No', 'Casino Royale']]

In [None]:
bond.head(10)

In [None]:
bond.loc[['Dr. No', 'Diamonds Are Forever', 'Live and Let Die']]

### similarily we can use iloc

In [None]:
bond.head(10)

In [None]:
bond.iloc[0] # 'Dr. No'

In [None]:
bond.head(10)

In [None]:
bond.iloc[1:6] # 'From Russia with Love' to 'You Only Live Twice' # This is slicing

In [None]:
bond.iloc[::2] # starts from the 0th row and takes a jump of 2 every time

In [None]:
bond.head(10)

In [None]:
bond.iloc[[0,5,0,4]] # Fancy Indexing

# second accessor to iloc and loc

In [None]:
import numpy as np
import pandas as pd

In [None]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film')
bond.head(10)

In [None]:
bond.loc['Dr. No', 'Actor']

In [None]:
bond.loc['Dr. No', ['Actor', 'Bond Actor Salary']]

In [None]:
bond.loc[['Dr. No', 'Thunderball'], ['Actor', 'Box Office']]

In [None]:
bond.head(10)

In [None]:
bond.loc['Dr. No' : 'Thunderball', 'Budget': :-2]

In [None]:
bond.head(10)

In [None]:
bond.loc['Dr. No' : 'Thunderball', ['Actor', 'Director', 'Year']]

### We can also pass second parameter in iloc accessor.

In [None]:
bond.head(10)

In [None]:
bond.iloc[0, 1]

In [None]:
bond.iloc[0, [1,3]]

In [None]:
bond.head(10)

In [None]:
bond.iloc[0:4, 1]

In [None]:
bond.head(10)

In [None]:
bond.iloc[0:4, [1, 4]]

In [None]:
bond.iloc[0:4, 1:4]

In [None]:
bond.head(10)

In [None]:
bond.iloc[1, [0,3,2]]

# Boolean masking using iloc and loc

In [None]:
import numpy as np
import pandas as pd

In [None]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film')
bond.head()

In [None]:
mask = (bond['Actor'] == 'Sean Connery')

# bond[mask]['Actor'] = 'Sir Sean Connery'
# bond[mask].iloc[::, 1] = 'Sir Sean Connery'
bond[mask].loc[::, 'Actor'] = 'Sir Sean Connery'

In [None]:
bond.loc[mask, 'Actor'] = 'Sir Sean Connery'

In [None]:
bond

# Rename columns in dataframe

    We can change the names of columns or row labels using the rename function. This function expects us to pass in
    a dictionary in the columns or index parameter with keys as the current name of columns and values as the names 
    that we want.

In [None]:
import numpy as np
import pandas as pd

bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film')
bond.head()

In [None]:
bond = bond.rename(columns = {'Box Office' : 'box_office', 'Budget' : 'cost'})
bond.head()

    We can also use the axis paramter to rename columns or index. We need to pass in a mapper dictionary
    and then need to specify the axis along which we want the renaming to be done.
    
    It can take 2 values. The value 0 means index and the value 1 means columns. we may also pass a string like 'index'
    or 'columns'.

In [None]:
mapper = {
    'Bond Actor Salary' : 'salary',
    'Actor' : 'actor',
    'Year' : 'year',
    'Director' : 'director'
}

# bond = bond.rename(mapper = mapper, axis = 1) # axis = 0 means rows and axis = 1 means columns.
bond = bond.rename(mapper = mapper, axis = 'columns')
bond.head()

In [None]:
mapper = {
    'Dr. No': 'dr_no',
    'Goldfinger' : 'gold_finger',
    'Thunderball' : 'thunder_ball'
}

bond = bond.rename(mapper = mapper, axis = 0)
# bond = bond.rename(mapper = mapper, axis = 'index')
bond.head()

# Deleting rows and columns from a dataframe

    1. The drop() method is used to drop a column or a row. For dropping a column, specify the labels and then mention
       axis as 1 (or 'columns') and for dropping a row mention the axis as 0 (or 'index').
       
    2. We also have the pop() method wherein we can pass the name of the column to be removed. One special thing about 
       this method is that it gives us the column that we deleted and it mutates the original dataframe. This is one of
       the very few mutating operations in the pandas library.

In [None]:
import numpy as np
import pandas as pd

In [None]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film')
bond.head()

In [None]:
bond = bond.drop('Actor', axis = 1)
bond.head()

In [None]:
bond = bond.drop(['Bond Actor Salary', 'Director'], axis = 'columns')
bond.head()

In [None]:
bond = bond.drop(['Dr. No', 'From Russia with Love'], axis = 0) # axis = 'index'
bond.head()

In [None]:
bond = bond.drop(index = ["You Only Live Twice", "On Her Majesty's Secret Service"])
bond.head()

In [None]:
bond.drop(columns = ['Budget', 'Box Office'], inplace = True)
bond.head()

## pop() method

    1. pop() is one of the very few methods in the pandas library which mutates the original dataframe.
    2. It returns us the deleted column in the form of a series.

In [None]:
import numpy as np
import pandas as pd

In [None]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film')
bond.head()

In [None]:
bond.pop('Director')
bond.head()

In [None]:
salary = bond.pop('Bond Actor Salary')
salary.head()

In [None]:
bond.head()

## del keyword to delete the columns from dataframe

In [None]:
import numpy as np
import pandas as pd

bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film')
bond.head()

In [None]:
del bond['Actor'] # This will delete the Actor column from the bond dataframe.
bond.head()

# sample() method
    
    1. The sample() method is used to select a random sample from the dataframe.

In [None]:
import numpy as np
import pandas as pd

In [None]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film')
bond.head()

In [None]:
bond.sample(n = 6) # will select 6 random rows from the dataframe.

In [None]:
bond.sample() # will select a single row randomly from the dataframe. 

# nlargest() and nsmallest() method

In [None]:
import numpy as np
import pandas as pd

bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film')
bond.head()

In [None]:
bond.sort_values(by = 'Budget', ascending = False).head(5)

    The above code the top 5 movies which have the highest budget. But we can also do this using nlargest() method.

In [None]:
bond.nlargest(n = 5, columns = 'Budget')

# apply() method

    1. The apply() method is applied on a dataframe and then returns us a series.
    
    2. The series returned has the same index as the original dataframe.
    
    3. We can then append the series which we got into the dataframe.

In [None]:
import numpy as np
import pandas as pd

In [None]:
bond = pd.read_csv('datasets/jamesbond.csv', index_col = 'Film').head(5)
bond

In [None]:
def print_series(series):
#     print(series)
#     print('*' * 50)
    pass

In [None]:
bond.apply(print_series, axis = 'columns')

    suppose we want to create a new column in bond called as summary which gives us the summary of the current row.

In [None]:
def summary(film):
    year = film['Year']
    actor = film['Actor']
    director = film['Director']
    money_made = film['Box Office']
    budget = film['Budget']
    return f''' The movie was made in the year {year} under the direction of {director}. 
                It had an overall budget of {budget} crore
                and the movie made around {money_made} crore on the box office.
                '''


bond['summary'] = bond.apply(summary, axis = 'columns')
bond.head(1)