In [12]:
import pandas as pd
import numpy as np

In [13]:
def process_data(file_path, destination_path):
    """Function to process data and return a csv file

    Args:
        file_path (str): File path of the original csv dataset.
        destination_path (str): Destination path of where to save 
            the processed csv dataset.
            
    Returns:
        pd.dataFrame: The processed dataframe in the printed form

    """
    
    #Importing the csv file
    df = pd.read_csv(file_path)
    
    #creating a copy of the dataframe
    processed_df = df.copy()
    
    #dropping the rows where there is no name
    processed_df = processed_df.dropna(axis=0, subset=['name'])
    
    #creating a new column called above_100 whereby the price is more than 100
    processed_df['above_100'] = processed_df['price'].apply(lambda x: 'True' if float(x) > 100 else 'False')

    # splitting the name into first_name and last_name
    splitted = df['name'].str.split()
    processed_df['first_name'] = splitted.str[0]
    processed_df['last_name'] = splitted.str[1]
    
    #dropping the old name column
    processed_df.drop('name', axis=1)
    
    #rearrange the column order 
    processed_df = processed_df.reindex(['first_name','last_name','price', 'above_100'], axis=1)
    
    #removing the zeros at the front eg. 0.154 --> .154 
    processed_df['price'] = processed_df['price'].apply(lambda x: str(x).lstrip('0'))
    
    processed_df.to_csv(destination_path, index = False)

    return processed_df

In [15]:
process_data('dataset1.csv', 'processed_dataset1.csv')

Unnamed: 0,first_name,last_name,price,above_100
0,William,Dixon,109.03727959999999,True
1,Kristen,Horn,262.5246522,True
2,Kimberly,Chang,187.00725830000002,True
3,Mary,Ball,283.1746476,True
4,Benjamin,Craig,143.87158169999998,True
...,...,...,...,...
4995,Shirley,Nguyen,9.011665322999999,False
4996,Jesse,Brown,247.3282318,True
4997,Valerie,Owens,238.1037141,True
4998,Alicia,Sharp,243.622929,True
