In [1]:
import logging
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates


def setup_logging():
    logging.basicConfig(
        level=logging.INFO, 
        format='%(asctime)s - %(levelname)s %(funcName)s | %(message)s',
        handlers=[
            logging.FileHandler('76-ipynb.log')
        ]
    )

    logging.info("Logging setup complete.")


def fetch_df(file_path: str) -> pd.DataFrame:
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError as notfound_error:
        logging.error(f"File not found: {file_path}. Error: {notfound_error}")
        raise notfound_error
    except pd.errors.EmptyDataError as empty_error:
        logging.error(f"File is empty: {file_path}. Error: {empty_error}")
        raise empty_error
    except pd.errors.ParserError as parse_error:
        logging.error(f"Error parsing file: {file_path}. Error: {parse_error}")
        raise parse_error
    except Exception as e:
        logging.error(f"An unexpected error occurred while reading {file_path}. Error: {e}")
        raise e
    else:
        logging.info(f"DataFrame loaded successfully from {file_path}.")
        return df
    

def log_df_info(df: pd.DataFrame) -> None:
    print(f"Shape: {df.shape}")
    print(f"Columns: {df.columns.tolist()}")
    # print(f"Sample:\n{df.sample(5)}")
    print(f'NaN: {df.isna().values.any()}')
    print(f'Duplicate: {df.duplicated().any()}')
    

# Data Cleaning: Removing NaN Values and Duplicates

In [None]:
if __name__ == "__main__":
    setup_logging()

    app_df = fetch_df('input-76.csv')
    # dropping columns that are not needed for analysis
    app_df = app_df.drop(columns=['Last_Updated', 'Android_Ver'], axis=1)
    
    duplicated_rows = app_df
    


    