# Cleanup of df DataFrame
## Provides a function to clean up given DataFrame

In [1]:
import pandas as pd

In [2]:
def clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """Cleans up given DataFrame as required by 2nd week homework.
    The only parameter is the pandas DataFrame to be cleaned up.
    Returns the modified DataFrame as result.
    Slightly cleaner code.
    """
    df = convert_datafrane_types(df)
    df = add_sizes(df)
    df = fix_order(df)
    return df

In [3]:
def convert_datafrane_types(df: pd.DataFrame) -> pd.DataFrame:
    """Convert columns to the required types.
    The only parameter is the pandas DataFrame to be cleaned up.
    Returns the modified DataFrame as result.
    """
    df['Class'] = df['Class'].str.strip()
    df['OrderDate'] = df['OrderDate'].astype('datetime64[ns]')
    df['LineTotal'] = df['LineTotal'].str.replace(',', '.').astype('float64')
    return df

def add_sizes(df: pd.DataFrame) -> pd.DataFrame:
    """Extract the sizes from ProductName, and add the sizes column.
    The only parameter is the pandas DataFrame to be cleaned up.
    Returns the modified DataFrame as result.
    """
    sizes = df['ProductName'].str.split(',', expand=True)[1].str.strip()
    colors = ['Black', 'Red', 'Blue']
    sizes = sizes.replace(colors, ['-'] * len(colors))
    df['Size'] = sizes
    return df

def fix_order(df: pd.DataFrame) -> pd.DataFrame:
    """Sort the rows by OrderDate and Country, then reset the row indexes.
    The only parameter is the pandas DataFrame to be cleaned up.
    Returns the modified DataFrame as result.
    """
    return df.sort_values(['OrderDate', 'Country']).reset_index(drop=True)