In [1]:
import pandas as pd
import logging 
import numpy as np
import re 
import uuid
from IPython.display import display
from sqlalchemy import MetaData, Table
from database_utils import DatabaseConnector
from data_extraction import DataExtractor
from data_cleaning import DataCleaning 

  """


In [4]:

def clean_orders_data():

    """
    This method retrieves the 'orders_table' via the 'read_data_from_table' method from the DataExtractor 
    It then drops column '1', 'first_name' and 'last_name'

    Args: 
        None 

    Returns: 
        dataframe: A dataframe with column '1', 'first_name' and 'last_name' dropped  

    """

    #LOGGING, the start of the method 
    print('started clean_orders_data')

    #creating instance of dataextractor
    instance = DataExtractor()

    # get the 'orders_table' data via the 'read_data_from_table' method, and assign it to df 
    df = instance.read_data_from_table('orders_table')

    # Cast 'date_uuid' column to UUID
    df['date_uuid'] = df['date_uuid'].apply(lambda value: uuid.UUID(value))

    # Cast 'user_uuid' column to UUID
    df['user_uuid'] = df['user_uuid'].apply(lambda value: uuid.UUID(value))

    # Get the maximum length for 'card_number' column
    max_length_card_number = df['card_number'].str.len().max()

    # Get the maximum length for 'store_code' column
    max_length_store_code = df['store_code'].str.len().max() 

    # Create VARCHAR column with the appropriate length
    df['card_number'] = df['card_number'].astype(f'STR{max_length_card_number}')

    # Create VARCHAR column with the appropriate length
    df['store_code'] = df['store_code'].astype(f'STR{max_length_store_code}')

    # Cast 'bigint' column to SMALLINT
    df['smallint'] = df['bigint'].astype('int16')  # Note: 'int16' is equivalent to SMALLINT

    # Drop the original columns if no longer needed
    df = df.drop(columns=['bigint', 'text'])


    # drop unwanted columns 
    df = df.drop('1', axis=1)
    df = df.drop('first_name', axis=1)
    df = df.drop('last_name', axis=1)

    # return the cleaned df 
    return df 


In [5]:
clean_orders_data()

started clean_orders_data
read_data_from_table is working
init_db_engine is working
read_db_creds is working


Unnamed: 0,level_0,index,date_uuid,user_uuid,card_number,store_code,product_code,product_quantity
0,0,0,9476f17e-5d6a-4117-874d-9cdb38ca1fa6,93caf182-e4e9-4c6e-bebb-60a1a9dcf9b8,30060773296197,BL-8387506C,R7-3126933h,3
1,1,1,0423a395-a04d-4e4a-bd0f-d237cbd5a295,8fe96c3a-d62d-4eb5-b313-cf12d9126a49,349624180933183,WEB-1388012W,C2-7287916l,2
2,2,2,65187294-bb16-4519-adc0-787bbe423970,fc461df4-b919-48b2-909e-55c95a03fe6b,3529023891650490,CH-01D85C8D,S7-1175877v,2
3,3,3,579e21f7-13cb-436b-83ad-33687a4eb337,6104719f-ef14-4b09-bf04-fb0c4620acb0,213142929492281,CL-C183BE4B,D8-8421505n,2
4,4,4,00ab86c3-2039-4674-b9c1-adbcbbf525bd,9523a6d3-b2dd-4670-a51a-36aebc89f579,502067329974,SO-B5B9CB3B,B6-2596063a,2
...,...,...,...,...,...,...,...,...
120118,110549,110548,f0e8fff6-9998-4661-954b-0e258e09d33c,95c74b0a-d495-4359-b1c0-e2da511e8403,575421945446,KA-FA7ED3B8,C9-6827622o,4
120119,82164,82164,1c80940a-d186-4ba9-9daa-8abd1aceae32,5d6fa6fe-e583-4baf-8bbb-d1dd6e2b551f,4971858637664481,WA-A41DA979,I0-1146408B,1
120120,97599,97599,58598aca-049c-418e-8e39-46327634a7f1,48b7f1fc-db13-4611-ad8e-3dac0b759488,4971858637664481,WEB-1388012W,A4-5443400b,4
120121,106591,106591,3a76f661-0707-4fbc-9862-f21d3249f581,51c0b538-7ded-4697-8e84-9f7aa13f9112,4971858637664481,SO-6D328417,E9-2782979e,4
