# Creating a function to read timezone from profile
The following builds on the read.py functions to create one that reads from the profile files in the zip files to read in the timezone, to validate against timestamp timezone.

In [1]:
import pandas as pd
from io import TextIOWrapper
import re
from pathlib import Path

from src.data_processing.read import ReadRecord, read_all, read_zip_file, headers_in_file
from src.configurations import Configuration

def read_all_profile(config: Configuration):
    return read_all(config, read_profile_from_zip)

def read_profile_from_zip(file_name, config):
    return read_zip_file(config,
                         file_name,
                         is_a_profile_csv_file,
                         read_profile_file_to_df)

def read_profile_file_to_df(archive, file, read_record):
    with archive.open(file, mode="r") as header_context:
        text_io_wrapper = TextIOWrapper(header_context, encoding="utf-8")
        header_cols = headers_in_file(text_io_wrapper)
        tz_cols = timezone_columns(header_cols)
    with archive.open(file, mode='r') as profile_file:
        file_name = TextIOWrapper(profile_file, encoding='utf-8')
        # read the file into a DataFrame
        df = pd.read_csv(file_name, usecols='defaultProfile'+tz_cols)
        df = df.melt(id_vars=['defaultProfile'], var_name='timezone', value_name='value')

def is_a_profile_csv_file(config, patient_id, file_path):
    # file starts with patient id and _entries
    start_string = patient_id + config.profile_csv_file_start
    startswith = Path(file_path).name.startswith(start_string)

    # has right file ending
    endswith = file_path.endswith(config.csv_extension)
    return startswith and endswith

def timezone_columns(columns):
    timezone_cols = [col for col in columns if re.search(r"timezone", col)]
    return timezone_cols

config = Configuration()
config.data_folder = Path("/data/raw")

read_records = read_all_profile(config)



ModuleNotFoundError: No module named 'seaborn'