'''
@Author: Samadhan Thube

@Date: 2024-08-31

@last modified by : Samadhan Thube

@last modified date : 2024-08-31
 
@Title : Pandas Library Problems

'''

In [1]:
import numpy as np
import pandas as pd

1. Write a Python program to create and display a one-dimensional array-like object
containing an array of data using Pandas module.

In [2]:
def pandas_series(data):
    """
    Description:
        Creates a one-dimensional array-like object using a Pandas Series and displays it.

    Parameter:
        data (list): A list of data to be stored in the Pandas Series.

    Return:
        None
    """
    series = pd.Series(data)

    
    return series

def main():
    
    data = [10, 20, 30, 40, 50]
    
    print("One-dimensional array-like object (Pandas Series):") 
    print(pandas_series(data))

if __name__ == "__main__":
    main()


One-dimensional array-like object (Pandas Series):
0    10
1    20
2    30
3    40
4    50
dtype: int64


2. Write a Python program to convert a Panda module Series to Python list and it's type.

In [3]:
def series_to_list(series):
    """
    Description:
        Converts a Pandas Series to a Python list and returns it along with its type.

    Parameter:
        series (pd.Series): A Pandas Series object to be converted.

    Return:
        list: A Python list converted from the Pandas Series.
        type: The type of the converted list.
    """
   
    python_list = series.tolist()
   
    return python_list

def main():
    
    data = [10, 20, 30, 40, 50]
    series = pd.Series(data)
    
    python_list= series_to_list(series)
    
    print("Converted Python list:", python_list)
    print("Type of the converted list:",  type(python_list))

if __name__ == "__main__":
    main()


Converted Python list: [10, 20, 30, 40, 50]
Type of the converted list: <class 'list'>


3. Write a Python program to add, subtract, multiple and divide two Pandas Series.

In [4]:
def arithmetic_operations(series1, series2):
    """
    Description:
        Performs addition, subtraction, multiplication, and division on two Pandas Series.

    Parameter:
        series1 (pd.Series): The first Pandas Series.
        series2 (pd.Series): The second Pandas Series.

    Return:
        dict: A dictionary containing the results of addition, subtraction, multiplication, and division.
    """
    
    addition = series1 + series2
    subtraction = series1 - series2
    multiplication = series1 * series2
    division = series1 / series2

    return addition,subtraction,multiplication,division

def main():
    
    data1 = [2, 4, 6, 8, 10]
    data2 = [1, 3, 5, 7, 9]
    
    series1 = pd.Series(data1)
    series2 = pd.Series(data2)
    
    
    addition,subtraction,multiplication,division = arithmetic_operations(series1, series2)
    
    print("Addition of two Series:\n", addition)
    print("\nSubtraction of two Series:\n", subtraction)
    print("\nMultiplication of two Series:\n", multiplication)
    print("\nDivision of two Series:\n", division)

if __name__ == "__main__":
    main()


Addition of two Series:
 0     3
1     7
2    11
3    15
4    19
dtype: int64

Subtraction of two Series:
 0    1
1    1
2    1
3    1
4    1
dtype: int64

Multiplication of two Series:
 0     2
1    12
2    30
3    56
4    90
dtype: int64

Division of two Series:
 0    2.000000
1    1.333333
2    1.200000
3    1.142857
4    1.111111
dtype: float64


4. Write a Python program to get the powers of an array values element-wise.

In [5]:
import pandas as pd

def elementwise_power(base_series, exponent_series):
    """
    Description:
        Computes the element-wise power of two pandas Series.

    Parameter:
        base_series (pd.Series): A pandas Series containing the base values.
        exponent_series (pd.Series): A pandas Series containing the exponent values.

    Return:
        pd.Series: A pandas Series containing the result of raising each base element to the power of the corresponding exponent.
    """
    return base_series ** exponent_series

def main():
    
    base_series = pd.Series([0, 1, 2, 3, 4, 5, 6])

    exponent_series = pd.Series([1, 1, 3, 3, 3, 3, 3])

    result_series = elementwise_power(base_series, exponent_series)

    print("Original array:")
    print(base_series.values)

    print("\nFirst array elements raised to powers from second array, element-wise:")
    print(result_series.values)

if __name__ == "__main__":
    main()


Original array:
[0 1 2 3 4 5 6]

First array elements raised to powers from second array, element-wise:
[  0   1   8  27  64 125 216]


5. Write a Python program to create and display a DataFrame from a specified dictionary
data which has the index labels.

In [6]:
def create_dataframe(exam_data, labels):
    """
    Description:
        Creates a DataFrame from the provided dictionary data and index labels.

    Parameter:
        exam_data (dict): A dictionary containing the data for the DataFrame.
        labels (list): A list of index labels for the DataFrame.

    Return:
        pd.DataFrame: A pandas DataFrame with the specified data and index labels.
    """
    
    return pd.DataFrame(exam_data, index=labels)

def main():
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael',
                 'Matthew', 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = create_dataframe(exam_data, labels)

    # Display the DataFrame
    print("DataFrame:")
    print(dataframe)

if __name__ == "__main__":
    main()


DataFrame:
        name  score  attempts qualify
a  Anastasia   12.5         1     yes
b       Dima    9.0         3      no
c  Katherine   16.5         2     yes
d      James    NaN         3      no
e      Emily    9.0         2      no
f    Michael   20.0         3     yes
g    Matthew   14.5         1     yes
h      Laura    NaN         1      no
i      Kevin    8.0         2      no
j      Jonas   19.0         1     yes


6. Write a Python program to display a summary of the basic information about a
specified Data Frame and its data.

In [7]:
def display_summary(dataframe):
    """
    Description:
        Displays a summary of the basic information about the DataFrame and its data.

    Parameter:
        df (pd.DataFrame): The DataFrame to summarize.

    Return:
        None
    """
    print("Basic Information about the DataFrame:")
    print(dataframe.info())
    print("\nStatistical Summary of the DataFrame:")
    print(dataframe.describe(include='all'))
    print("\nFirst few rows of the DataFrame:")
    print(dataframe.head())

def main():
 
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)
    display_summary(dataframe)

if __name__ == "__main__":
    main()


Basic Information about the DataFrame:
<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, a to j
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   name      10 non-null     object 
 1   score     8 non-null      float64
 2   attempts  10 non-null     int64  
 3   qualify   10 non-null     object 
dtypes: float64(1), int64(1), object(2)
memory usage: 400.0+ bytes
None

Statistical Summary of the DataFrame:
             name      score   attempts qualify
count          10   8.000000  10.000000      10
unique         10        NaN        NaN       2
top     Anastasia        NaN        NaN     yes
freq            1        NaN        NaN       5
mean          NaN  13.562500   1.900000     NaN
std           NaN   4.693746   0.875595     NaN
min           NaN   8.000000   1.000000     NaN
25%           NaN   9.000000   1.000000     NaN
50%           NaN  13.500000   2.000000     NaN
75%           NaN  17.125000   2.750000 

7. Write a Python program to get the first 3 rows of a given DataFrame.

In [8]:
def main():
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael',
                 'Matthew', 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    df = pd.DataFrame(exam_data, index=labels)

    print("First 3 rows of the DataFrame:")
    print(df.head(3))

if __name__ == "__main__":
    main()


First 3 rows of the DataFrame:
        name  score  attempts qualify
a  Anastasia   12.5         1     yes
b       Dima    9.0         3      no
c  Katherine   16.5         2     yes


8. Write a Python program to select the 'name' and 'score' columns from the following
DataFrame.

In [9]:
def select_columns(dataframe):
    """
    Description:
        Selects specified columns from the DataFrame.

    Parameter:
        df (pd.DataFrame): The DataFrame from which to select columns.

    Return:
        pd.DataFrame: A DataFrame with only the selected columns.
    """
    return dataframe[['name','score']]

def main():
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
    dataframe = pd.DataFrame(exam_data, index=labels)

    print("DataFrame with 'name' and 'score' columns:")
    print(select_columns(dataframe))

if __name__ == "__main__":
    main()


DataFrame with 'name' and 'score' columns:
        name  score
a  Anastasia   12.5
b       Dima    9.0
c  Katherine   16.5
d      James    NaN
e      Emily    9.0
f    Michael   20.0
g    Matthew   14.5
h      Laura    NaN
i      Kevin    8.0
j      Jonas   19.0


9. Write a Python program to select the specified columns and rows from a given data
frame.

In [10]:
def columns_and_rows(dataframe, columns, rows):
    """
    Description:
        Selects specified columns and rows from the DataFrame.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame from which to select columns and rows.
        columns (list): A list of column names to select.
        rows (list): A list of row indices to select.

    Return:
        pd.DataFrame: A DataFrame with only the selected columns and rows.
    """
    return dataframe.loc[rows, columns]

def main():
    
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }
    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    columns = ['name', 'score']
    rows = ['b', 'd', 'f', 'g']  

    selected_data = columns_and_rows(dataframe, columns, rows)

    print("Selected columns and rows:")
    print(selected_data)

if __name__ == "__main__":
    main()


Selected columns and rows:
      name  score
b     Dima    9.0
d    James    NaN
f  Michael   20.0
g  Matthew   14.5


10. Write a Python program to select the rows where the number of attempts in the
examination is greater than 2.

In [11]:
def filter_attempts(dataframe):
    """
    Description:
        Selects rows from the DataFrame where the number of attempts is greater than a specified value.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame to filter.
        min_attempts (int): The minimum number of attempts to filter by.

    Return:
        pd.DataFrame: A DataFrame with rows where the number of attempts is greater than the specified value.
    """
    return dataframe[dataframe['attempts'] > 2]

def main():
   
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
    dataframe = pd.DataFrame(exam_data, index=labels)

    filtered_data = filter_attempts(dataframe)

    print("Rows where the number of attempts is greater than 2:")
    print(filtered_data)

if __name__ == "__main__":
    main()


Rows where the number of attempts is greater than 2:
      name  score  attempts qualify
b     Dima    9.0         3      no
d    James    NaN         3      no
f  Michael   20.0         3     yes


11. Write a Python program to count the number of rows and columns of a DataFrame.

In [12]:
def count_rows_and_columns(dataframe):
    """
    Description:
        Counts the number of rows and columns in the DataFrame.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame to count rows and columns.

    Return:
        tuple: A tuple containing the number of rows and columns.
    """
    return dataframe.shape

def main():
 
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    num_rows, num_columns = count_rows_and_columns(dataframe)

    print(f"Number of rows: {num_rows}")
    print(f"Number of columns: {num_columns}")

if __name__ == "__main__":
    main()


Number of rows: 10
Number of columns: 4


12. Write a Python program to select the rows where the score is missing, i.e. is NaN.

In [13]:
def missing_scores(dataframe):
    """
    Description:
        Selects rows from the DataFrame where the score is missing (i.e., NaN).

    Parameter:
        dataframe (pd.DataFrame): The DataFrame to filter.

    Return:
        pd.DataFrame: A DataFrame with rows where the score is NaN.
    """
    return dataframe[dataframe['score'].isna()]

def main():
 
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    print("Rows where the score is missing (NaN):")
    print(missing_scores(dataframe))

if __name__ == "__main__":
    main()


Rows where the score is missing (NaN):
    name  score  attempts qualify
d  James    NaN         3      no
h  Laura    NaN         1      no


13. Write a Python program to select the rows where number of attempts in the
examination is less than 2 and score greater than 15.

In [14]:
def filter_attempts_and_score(dataframe):
    """
    Description:
        Selects rows from the DataFrame where the number of attempts is less than a specified value
        and the score is greater than a specified value.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame to filter.

    Return:
        pd.DataFrame: A DataFrame with rows matching the specified conditions.
    """
    return dataframe[(dataframe['attempts'] < 2) & (dataframe['score'] > 15)]

def main():

    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    filtered_data = filter_attempts_and_score(dataframe)

    print("Rows where number of attempts is less than 2 and score is greater than 15:")
    print(filtered_data)

if __name__ == "__main__":
    main()


Rows where number of attempts is less than 2 and score is greater than 15:
    name  score  attempts qualify
j  Jonas   19.0         1     yes


14. Write a Python program to change the score in row 'd' to 11.5.

In [15]:
def update_score(dataframe):
    """
    Description:
        Updates the score for a specified row in the DataFrame.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame to update.

    Return:
        pd.DataFrame: The updated DataFrame.
    """
    dataframe.loc['d', 'score'] = 11.5
    return dataframe

def main():
    
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    updated_dataframe = update_score(dataframe)

    print("DataFrame after updating the score in row 'd':")
    print(updated_dataframe)

if __name__ == "__main__":
    main()


DataFrame after updating the score in row 'd':
        name  score  attempts qualify
a  Anastasia   12.5         1     yes
b       Dima    9.0         3      no
c  Katherine   16.5         2     yes
d      James   11.5         3      no
e      Emily    9.0         2      no
f    Michael   20.0         3     yes
g    Matthew   14.5         1     yes
h      Laura    NaN         1      no
i      Kevin    8.0         2      no
j      Jonas   19.0         1     yes


In [16]:
def sum_attempts(dataframe):
    """
    Description:
        Calculates the sum of the examination attempts by the students.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame from which to calculate the sum.

    Return:
        int: The sum of the examination attempts.
    """
    return dataframe['attempts'].sum()

def main():
   
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    total_attempts = sum_attempts(dataframe)

    print(f"Total number of examination attempts: {total_attempts}")

if __name__ == "__main__":
    main()


Total number of examination attempts: 19


16. Write a Python program to calculate the mean score for each different student in
DataFrame.

In [17]:
def mean_score(dataframe):
    """
    Description:
        Calculates the mean score for each student in the DataFrame.
        Since each student has only one score, this will just return the score for each student.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame containing student data.

    Return:
        pd.Series: A Series with student labels as index and their corresponding scores.
    """
    return dataframe['score']

def main():
    
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    mean_scores = mean_score(dataframe)

    print("Mean scores for each student:")
    print(mean_scores)

if __name__ == "__main__":
    main()


Mean scores for each student:
a    12.5
b     9.0
c    16.5
d     NaN
e     9.0
f    20.0
g    14.5
h     NaN
i     8.0
j    19.0
Name: score, dtype: float64


17. Write a Python program to append a new row 'k' to data frame with given values for
each column. Now delete the new row and return the original DataFrame.

In [18]:
def modify_dataframe(dataframe):
    """
    Description:
        Appends a new row to the DataFrame, then deletes the row, and returns the original DataFrame.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame to be modified.

    Return:
        pd.DataFrame: The original DataFrame after appending and deleting the new row.
    """
    new_row = pd.DataFrame({
        'name': ['Suresh'],
        'score': [15.5],
        'attempts': [1],
        'qualify': ['yes']
    }, index=['k'])
    
    dataframe = pd.concat([dataframe, new_row])

    dataframe = dataframe.drop('k')

    return dataframe

def main():

    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }
    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)
  
    print("Original DataFrame after appending and deleting the new row:")
    print(modify_dataframe(dataframe))

if __name__ == "__main__":
    main()


Original DataFrame after appending and deleting the new row:
        name  score  attempts qualify
a  Anastasia   12.5         1     yes
b       Dima    9.0         3      no
c  Katherine   16.5         2     yes
d      James    NaN         3      no
e      Emily    9.0         2      no
f    Michael   20.0         3     yes
g    Matthew   14.5         1     yes
h      Laura    NaN         1      no
i      Kevin    8.0         2      no
j      Jonas   19.0         1     yes


18. Write a Python program to sort the DataFrame first by 'name' in descending order,
then by 'score' in ascending order.

In [19]:
def sort_dataframe(dataframe):
    """
    Description:
        Sorts the DataFrame first by 'name' in descending order, then by 'score' in ascending order.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame to be sorted.

    Return:
        pd.DataFrame: The sorted DataFrame.
    """
    sorted_dataframe = dataframe.sort_values(by=['name', 'score'], ascending=[False, True])
    
    return sorted_dataframe

def main():
  
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }
    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    new_row = pd.DataFrame({
        'name': ['Suresh'],
        'score': [15.5],
        'attempts': [1],
        'qualify': ['yes']
    }, index=['k'])

    dataframe = pd.concat([dataframe, new_row])

    sorted_dataframe = sort_dataframe(dataframe)

    print("Sorted DataFrame:")
    print(sorted_dataframe)

if __name__ == "__main__":
    main()


Sorted DataFrame:
        name  score  attempts qualify
k     Suresh   15.5         1     yes
f    Michael   20.0         3     yes
g    Matthew   14.5         1     yes
h      Laura    NaN         1      no
i      Kevin    8.0         2      no
c  Katherine   16.5         2     yes
j      Jonas   19.0         1     yes
d      James    NaN         3      no
e      Emily    9.0         2      no
b       Dima    9.0         3      no
a  Anastasia   12.5         1     yes


19. Write a Python program to replace the 'qualify' column contains the values 'yes' and
'no' with True and False.

In [20]:
def replace_values(dataframe):
    """
    Description:
        Replaces 'yes' with True and 'no' with False in the 'qualify' column.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame with the 'qualify' column to be replaced.

    Return:
        pd.DataFrame: The DataFrame with replaced values in the 'qualify' column.
    """
    
    dataframe['qualify'] = dataframe['qualify'].replace({'yes': True, 'no': False})
    
    return dataframe

def main():
    
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }
    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    print("Updated DataFrame with 'qualify' values replaced:")
    print(replace_values(dataframe))

if __name__ == "__main__":
    main()


Updated DataFrame with 'qualify' values replaced:
        name  score  attempts  qualify
a  Anastasia   12.5         1     True
b       Dima    9.0         3    False
c  Katherine   16.5         2     True
d      James    NaN         3    False
e      Emily    9.0         2    False
f    Michael   20.0         3     True
g    Matthew   14.5         1     True
h      Laura    NaN         1    False
i      Kevin    8.0         2    False
j      Jonas   19.0         1     True


  dataframe['qualify'] = dataframe['qualify'].replace({'yes': True, 'no': False})


20. Write a Python program to delete the 'attempts' column from the DataFrame.

In [21]:
def delete_column(dataframe):
    """
    Description:
        Deletes the 'attempts' column from the DataFrame.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame from which to delete the 'attempts' column.

    Return:
        pd.DataFrame: The DataFrame with the 'attempts' column removed.
    """
    dataframe = dataframe.drop(columns='attempts')
    
    return dataframe

def main():
    
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }

    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    print("DataFrame after deleting the 'attempts' column:")
    print(delete_column(dataframe))

if __name__ == "__main__":
    main()


DataFrame after deleting the 'attempts' column:
        name  score qualify
a  Anastasia   12.5     yes
b       Dima    9.0      no
c  Katherine   16.5     yes
d      James    NaN      no
e      Emily    9.0      no
f    Michael   20.0     yes
g    Matthew   14.5     yes
h      Laura    NaN      no
i      Kevin    8.0      no
j      Jonas   19.0     yes


21. Write a Python program to insert a new column in existing DataFrame.

In [22]:
def insert_column(dataframe, column_name, values):
    """
    Description:
        Inserts a new column into the existing DataFrame.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame to which the new column will be added.
        column_name (str): The name of the new column.
        values (list): The values for the new column.

    Return:
        pd.DataFrame: The DataFrame with the new column added.
    """
    dataframe[column_name] = values
    return dataframe

def main():
    
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }
    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    column_name = 'grade'
    values = ['A', 'B', 'A', 'C', 'B', 'A', 'B', 'C', 'B', 'A']

    updated_dataframe = insert_column(dataframe, column_name,values)

    print("DataFrame after inserting the new column:")
    print(updated_dataframe)

if __name__ == "__main__":
    main()


DataFrame after inserting the new column:
        name  score  attempts qualify grade
a  Anastasia   12.5         1     yes     A
b       Dima    9.0         3      no     B
c  Katherine   16.5         2     yes     A
d      James    NaN         3      no     C
e      Emily    9.0         2      no     B
f    Michael   20.0         3     yes     A
g    Matthew   14.5         1     yes     B
h      Laura    NaN         1      no     C
i      Kevin    8.0         2      no     B
j      Jonas   19.0         1     yes     A


22. Write a Python program to iterate over rows in a DataFrame.

In [23]:
def iterate_rows(dataframe):
    """
    Description:
        Iterates over rows in the DataFrame and prints each row.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame to iterate over.
    """

    for index, row in dataframe.iterrows():
        print(f"Index: {index}")
        print(f"Row data:\n{row}\n")

def main():
    
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }
    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    iterate_rows(dataframe)

if __name__ == "__main__":
    main()


Index: a
Row data:
name        Anastasia
score            12.5
attempts            1
qualify           yes
Name: a, dtype: object

Index: b
Row data:
name        Dima
score        9.0
attempts       3
qualify       no
Name: b, dtype: object

Index: c
Row data:
name        Katherine
score            16.5
attempts            2
qualify           yes
Name: c, dtype: object

Index: d
Row data:
name        James
score         NaN
attempts        3
qualify        no
Name: d, dtype: object

Index: e
Row data:
name        Emily
score         9.0
attempts        2
qualify        no
Name: e, dtype: object

Index: f
Row data:
name        Michael
score          20.0
attempts          3
qualify         yes
Name: f, dtype: object

Index: g
Row data:
name        Matthew
score          14.5
attempts          1
qualify         yes
Name: g, dtype: object

Index: h
Row data:
name        Laura
score         NaN
attempts        1
qualify        no
Name: h, dtype: object

Index: i
Row data:
name        Kevin

23. Write a Python program to get list from DataFrame column headers.

In [24]:
def column_headers(dataframe):
    """
    Description:
        Gets the list of column headers from the DataFrame.

    Parameter:
        dataframe (pd.DataFrame): The DataFrame from which to get column headers.

    Return:
        list: A list of column headers.
    """
    return list(dataframe.columns)

def main():
    
    exam_data = {
        'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew',
                 'Laura', 'Kevin', 'Jonas'],
        'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
        'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']
    }
    labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    dataframe = pd.DataFrame(exam_data, index=labels)

    print("Column headers:", column_headers(dataframe))

if __name__ == "__main__":
    main()


Column headers: ['name', 'score', 'attempts', 'qualify']
