# **Pandas**

In [None]:
import numpy as np # importing the NumPy Library
import pandas as pd # importing the Pandas Library

In [None]:
dict = {
    "Name": ['Rohan', 'Raj', 'Ram', 'Hari'],
    "Marks": [91, 93, 97, 95],
    "City": ['Dwarka', 'Udaipur', 'Ayodhya', 'Mathura']
}

In [None]:
df = pd.DataFrame(dict) # DataFrame() function creates a table of organized data
df

In [None]:
df.to_csv('Data/Students.csv') # to_csv() function extracts the DataFrame to a .csv file

In [None]:
df.to_csv('Data/Students.csv', index = False) # False index removes the index column of the DataFrame

In [None]:
df.head(2) # head() function returns the first two rows of the data in the DataFrame

In [None]:
df.tail(2) # tail() function returns the last two rows of the data in the DataFrame

In [None]:
df.describe() # describe() function returns description of the data in the DataFrame

In [None]:
ser = pd.Series(np.random.rand(10)) # Series() function creates a series of the specfied parameter
ser

In [None]:
type(ser) # type() function returns the type of the specified object

In [None]:
newdf = pd.DataFrame(np.random.rand(100, 5), index = np.arange(100)) # here the DataFrame() function creates 100 rows and 5 columns with index ranging upto 100
newdf # here the whole DataFrame is displayed ranging from 0 to 99

In [None]:
newdf.head(5) # here the head() function displays the first five rows of the DataFrame

In [None]:
newdf.tail(5) # here the tail() function displays the last five rows of the DataFrame

In [None]:
newdf.dtypes # returns the datatypes of the specified columns

In [None]:
newdf.index # returns the indices of the DataFrame

In [None]:
newdf.columns # returns the range of the columns of the DataFrame

In [None]:
newdf.to_numpy # converts the DataFrame into NumPy Array

In [None]:
newdf.sort_index(axis = 0) # sorting in ascending order (axis = 0 for row sorting)

In [None]:
newdf.sort_index(axis = 1) # sorting in ascending order (axis = 1 for column sorting)

In [None]:
newdf.sort_index(axis = 0, ascending = False) # sorting in descending order (axis = 0 for row sorting)

In [None]:
newdf.sort_index(axis = 1, ascending = False) # sorting in descending order (axis = 1 for column sorting)

In [None]:
newdf[0] # returns the desired column (here 0)

In [None]:
newdf.loc[0, 0] = 7 # replaces the value at the (0, 0) index to 7
newdf

In [None]:
newdf.columns = list('ABCDE') # renames column names to corresponding alphabets
newdf

In [None]:
newdf.loc[0, 0] = 77 # couldn't update the value as the old column name is specified, hence creates a new column
newdf

In [None]:
newdf.loc[0, 'A'] = 77 # updates the value accurately as the updated column name is specified
newdf

In [None]:
newdf.drop(0, axis = 1) # drop() function drops a specified column/row, provided its axis from the DataFrame (axis = 0 for Row; axis = 1 for Column)

In [None]:
newdf.loc[[1, 2], ['A', 'D']] # returns the data present in the specified indices of the DataFrame

In [None]:
newdf.loc[:, ['A', 'D']] # returns the whole data present in the specified column in the DataFrame

In [None]:
newdf.loc[[1, 2], :] # returns the whole data present in the specified row in the DataFrame

In [None]:
newdf.loc[(newdf['A'] < 0.3)] # returns the data satisfying the specific condition

In [None]:
newdf.loc[(newdf['A'] < 0.3) & (newdf['D'] > 0.4)] # returns the data satisfying the specific condition

In [None]:
newdf.iloc[0, 3] # returns the value present in the specified index location of the DataFrame

In [None]:
newdf.iloc[[2, 4], [1, 3]] # returns the value present in the specified index location of the DataFrame

In [None]:
newdf.drop([1, 5], axis = 0) # drop() function drops a specified column/row, provided its axis and the column/row name (axis = 0 for Row)

In [None]:
newdf.drop(['A', 'D'], axis = 1) # drop() function drops a specified column/row, provided its axis and the column/row name (axis = 1 for Column)

In [None]:
newdf.reset_index() # reset_index() function resets the indices back to the default indices

In [None]:
newdf.drop(['A', 'D'], axis = 1, inplace = True) # inplace drops and modifies the values in the original DataFrame
newdf

In [None]:
newdf.reset_index(drop = True, inplace = True) # drop = True dismisses the unnecessary extra column created by the reset_function()

In [None]:
newdf.loc[:, ['C']] = None # None replaces all the values of column C with null values
newdf

In [None]:
newdf['C'].isnull() # isnull() function checks for null values and returns true if a null value is found

In [None]:
newdf.dropna

In [None]:
df = pd.DataFrame({"Name": ['Alfred', 'Batman', 'Alfred'],
                   "Utility": [np.NaN, np.NaN, np.NaN],
                   "Born": [pd.NaT, pd.Timestamp("1940-04-05"), pd.NaT]
})
df.head() # creating and displaying a new DataFrame

In [None]:
df.dropna() # dropna() function removes the row/column that contains NULL values

In [None]:
df.dropna(how = 'all') # determines if a row/column is removed from DataFrame, when at least one cell or all cells contain NULL values

In [None]:
df.dropna(how = 'all', axis = 1) # determines if a row/column is removed from DataFrame, when at least one cell or all cells contain NULL values (axis = 1 for Column)

In [None]:
df.drop_duplicates(subset = ['Name']) # returns the DataFrame with duplicate rows removed

In [None]:
df.drop_duplicates(subset = ['Name'], keep = 'last') # determines which duplicates (if any) to keep (keep = 'first', 'last', False)

In [None]:
df.shape # returns the number of rows and columns of a given DataFrame

In [None]:
df.info() # returns the information regarding the DataFrame

In [None]:
df['Utility'].value_counts(dropna = False) # returns a Series containing counts of unique rows in the DataFrame (dropna = False for including the counts of rows that contain NULL values)

In [None]:
df.notnull # return a boolean same-sized object indicating if the values are not NULL. Non-missing values get mapped to True