# Deel A : nodige modules

In [1]:
# standaard in python
import os 
import datetime
import sys
import json

In [2]:
# te installeren packages en modules
import numpy as np
import pandas as pd

In [3]:
# eigen packages modules

# Deel B : Functies om herhaling te vermijden

In [4]:
def load_msr_into_dataframe(loc):
    """
    What:
        Loads the MSR values, given the specific format of the json file, into a dataframe
        note : it is expected there is ALWAYS a file with correctly json formatted data

    Args:
        loc : a location where to find the *.json file 
        
    Returns:
        df (pd.DataFrame): the DataFrame to create with the location and MSR numbers
    """
    # load jsn file with MSR values
    loc = "../data/raw/productiemodel/data_productie/master_data.json"
    with open(loc, 'r') as file:
        msr_file = json.load(file)

    # convert the json data to a suitable dictionary format to be loaded into a dataframe
    msr_expanded = list()
    for msr_description, msr_value in msr_file.items():
        #  make a dictionary each time
        dict_msr = {'factory':msr_description[:3], 
                    'msr_description': msr_description,
                    'msr_value':msr_value
                   }
        msr_expanded.append(dict_msr)

    # load the list of data dictionaries into the dataframe for a MSR value per location
    df_msr = pd.DataFrame(msr_expanded)
        
    return df_msr

In [5]:
def load_daily_production_into_dataframe(fact, loc):
    """
    What:
        Loads the available JSON files, given the specific format of the json file, into a dataframe
        note : it is expected there is ALWAYS correctly json formatted data

    Args:
        fact : name of factory for which daily production data is loaded
        loc  : a location where to find the *.json file(s)
        
    Returns:
        df (pd.DataFrame): the DataFrame to be create with daimy production for a specific factory
    """

    # load all the json files into a list of json files
    daily_production_files = list()

    # loop over files in folder
    for file in os.listdir(loc):

        if file.endswith('.json'):
            json_file_path = os.path.join(loc, file)
            
            # json_files.append(os.path.join(loc, filename))
            # print(json_file_path)
            with open(json_file_path, 'r') as json_file:
                json_data = json.load(json_file)

            #  add factory identification to the dictionary
            json_data['factory'] = fact
            
            daily_production_files.append(json_data)


    # convert the list to a single data frame
    df_production = pd.DataFrame(daily_production_files)
    
    return df_production

# Deel C : opladen van de gegevens

In [6]:
# C.1 load json file with MSR values
min_sust_rate_location = "../data/raw/productiemodel/data_productie/master_data.json"
df_min_sust_rate = load_msr_into_dataframe(min_sust_rate_location)

df_min_sust_rate

Unnamed: 0,factory,msr_description,msr_value
0,BRU,BRU msr,1244
1,STO,STO msr,274


In [7]:
# C.2 load each daily production set into it's own dataframe
daily_prd_location = "../data/raw/productiemodel/data_productie/daily_production/"

In [8]:
# C.2.1. load BRU daily production set into it's own dataframe
factory = 'BRU'
df_daily_production_BRU = load_daily_production_into_dataframe(
                                factory, 
                                os.path.join(daily_prd_location, factory))
df_daily_production_BRU

Unnamed: 0,DoW,hour,minute,date,maintenance,prod_loss,prod_loss_perc,production,factory
0,Monday,1,0,01-01-2018 00:00:00.0000,No,61,5,1183,BRU
1,Tuesday,1,0,01-02-2018 00:00:00.0000,No,106,9,1138,BRU
2,Wednesday,1,0,01-03-2018 00:00:00.0000,No,129,10,1115,BRU
3,Thursday,1,0,01-04-2018 00:00:00.0000,No,278,22,966,BRU
4,Friday,1,0,01-05-2018 00:00:00.0000,No,218,17,1026,BRU
...,...,...,...,...,...,...,...,...,...
1825,Saturday,1,0,12-31-2022 00:00:00.0000,No,203,16,1041,BRU
1826,Sunday,1,0,01-01-2023 00:00:00.0000,No,231,19,1013,BRU
1827,Monday,1,0,01-02-2023 00:00:00.0000,No,181,15,1063,BRU
1828,Tuesday,1,0,01-03-2023 00:00:00.0000,No,198,16,1046,BRU


In [9]:
# C.2.2. load STO daily production set into it's own dataframe
factory = 'STO'
df_daily_production_STO = load_daily_production_into_dataframe(
                                factory, 
                                os.path.join(daily_prd_location, factory))
df_daily_production_STO

Unnamed: 0,DoW,hour,minute,date,maintenance,prod_loss,prod_loss_perc,production,factory
0,Monday,1,0,01-01-2018 00:00:00.0000,No,9,3,265,STO
1,Tuesday,1,0,01-02-2018 00:00:00.0000,No,9,3,265,STO
2,Wednesday,1,0,01-03-2018 00:00:00.0000,No,18,6,256,STO
3,Thursday,1,0,01-04-2018 00:00:00.0000,No,22,8,252,STO
4,Friday,1,0,01-05-2018 00:00:00.0000,No,31,11,243,STO
...,...,...,...,...,...,...,...,...,...
1825,Saturday,1,0,12-31-2022 00:00:00.0000,No,51,19,223,STO
1826,Sunday,1,0,01-01-2023 00:00:00.0000,No,53,20,221,STO
1827,Monday,1,0,01-02-2023 00:00:00.0000,No,61,22,213,STO
1828,Tuesday,1,0,01-03-2023 00:00:00.0000,No,70,25,204,STO


# Deel D : minimale controle van het Dataframe