# Manufacturing equipment

In [15]:
import os
base_path = os.getcwd()
print(base_path)

/home/tim/Documents/work/apache_flink


In [None]:
from pyflink.table import TableEnvironment, EnvironmentSettings, DataTypes, TableDescriptor, Schema
from pyflink.common import Configuration

# Set up the execution configuration
configuration = Configuration()
configuration.set_integer("table.exec.resource.default-parallelism", 1)

# Create the TableEnvironment in streaming mode
t_env = TableEnvironment.create(
    EnvironmentSettings.new_instance().in_streaming_mode().with_configuration(configuration).build()
)

# Create a temporary table for the manufacturing equipment data
t_env.create_temporary_table(
    'manufacturing_data',
    TableDescriptor.for_connector('filesystem')
    .schema(
        Schema.new_builder()
            .column('creationTS', DataTypes.STRING())  # Updated to STRING
            .column('messageIndex', DataTypes.BIGINT())
            .column('mf01', DataTypes.SMALLINT())
            .column('mf02', DataTypes.SMALLINT())
            .column('mf03', DataTypes.SMALLINT())
            .column('pc13', DataTypes.SMALLINT())
            .column('pc14', DataTypes.SMALLINT())
            .column('pc15', DataTypes.SMALLINT())
            .column('pc25', DataTypes.SMALLINT())
            .column('pc26', DataTypes.SMALLINT())
            .column('pc27', DataTypes.SMALLINT())
            .column('res', DataTypes.SMALLINT())
            .column('bm05', DataTypes.SMALLINT())
            .column('bm06', DataTypes.SMALLINT())
            # Convert the ISO 8601 string to a TIMESTAMP.
            .column_by_expression('eventTime', "TO_TIMESTAMP(creationTS, 'yyyy-MM-dd''T''HH:mm:ss.SSSSSSSXXX')")
            .watermark('eventTime', "eventTime")
            .build()
    )
    .option('path', f'{base_path}/data/manufacturing-equiptment(DEBS)/DEBS2012-small.csv')
    .format('csv')  # Default CSV settings; adjust options if necessary
    .build()
)

# Create the sink table for Query 1
t_env.create_temporary_table(
    'sink_q1',
    TableDescriptor.for_connector('filesystem')
    .schema(
        Schema.new_builder()
            .column('avg_mf01', DataTypes.FLOAT())
            .column('avg_mf02', DataTypes.FLOAT())
            .column('avg_mf03', DataTypes.FLOAT())
            .column('window_start', DataTypes.BIGINT())
            .column('window_end', DataTypes.BIGINT())
            .build()
    )
    .option('path', f'{base_path}/data/manufacturing-equiptment(DEBS)/outputs/sink_q1.csv')
    .format('csv')
    .build()
)


## Query 1
```cpp
Query::from("me")
    .window(SlidingWindow::of(EventTime(RecordCreationTs()), Seconds(60), Seconds(1)))
    .apply(Avg(Attribute("mf01"))->as(Attribute("avg_mf01")), Avg(Attribute("mf02"))->as(Attribute("avg_mf02")), Avg(Attribute("mf03"))->as(Attribute("avg_mf03")))
    .sink(NullOutputSinkDescriptor::create());


In [17]:
t_env.execute_sql('''
INSERT INTO sink_q1
SELECT 
    AVG(mf01) AS avg_mf01,
    AVG(mf02) AS avg_mf02,
    AVG(mf03) AS avg_mf03,
    1000 * UNIX_TIMESTAMP(CAST(window_start AS STRING)) + EXTRACT(MILLISECOND FROM window_start) as `window_start`,
    1000 * UNIX_TIMESTAMP(CAST(window_end AS STRING)) + EXTRACT(MILLISECOND FROM window_end) as `window_end`
FROM TABLE(
    HOP(
         TABLE manufacturing_data,
         DESCRIPTOR(eventTime),
         INTERVAL '1' SECOND,  -- slide interval
         INTERVAL '60' SECOND  -- window size
    )
)
GROUP BY window_start, window_end;
''').wait()

SchemaPtr ManufacturingEquipmentDataGenerator::getSchema() {
    return Schema::create()
        ->addField("creationTS", BasicType::INT64)
        ->addField("messageIndex", BasicType::INT64)
        ->addField("mf01", BasicType::INT16)
        ->addField("mf02", BasicType::INT16)
        ->addField("mf03", BasicType::INT16)
        ->addField("pc13", BasicType::INT16)
        ->addField("pc14", BasicType::INT16)
        ->addField("pc15", BasicType::INT16)
        ->addField("pc25", BasicType::UINT16)
        ->addField("pc26", BasicType::UINT16)
        ->addField("pc27", BasicType::UINT16)
        ->addField("res", BasicType::UINT16)
        ->addField("bm05", BasicType::INT16)
        ->addField("bm06", BasicType::INT16);
}

== 14 Felder

## Real schema from webiste 

message CDataPoint {
required fixed64 ts     = 1; //time stamp: nanoseconds since 1st Jan 1970
required fixed64 index  = 2; //message index
required fixed32 mf01 = 3; //Electrical Power Main Phase 1
required fixed32 mf02 = 4; //Electrical Power Main Phase 2
required fixed32  mf03 = 5; //Electrical Power Main Phase 3
required fixed32 pc13 = 6; //Anode Current Drop Detection Cell 1
required fixed32 pc14 = 7; //Anode Current Drop Detection Cell 2
required fixed32 pc15 = 8; //Anode Current Drop Detection Cell 3
required uint32 pc25 = 9; //Anode Voltage Drop Detection Cell 1
required uint32 pc26 = 10; //Anode Voltage Drop Detection Cell 2
required uint32 pc27 = 11; //Anode Voltage Drop Detection Cell 3
required uint32 res  = 12;
required bool bm05  = 13; //Chem A Additive Sense
required bool bm06  = 14; //Chem B Additive Sense
required bool bm07  = 15; //Chem C Additive Sense
required bool bm08  = 16; //Chem A Additive Release Valve VL26
required bool bm09  = 17; //Chem B Additive Release Valve VL27
required bool bm10  = 18; //Chem C Additive Release Valve VL28
optional bool pp01  = 19;
optional bool pp02  = 20;
optional bool pp03  = 21;
optional bool pp04  = 22;
optional bool pp05  = 23;
optional bool pp06  = 24;
optional bool pp07  = 25;
optional bool pp08  = 26;
optional bool pp09  = 27;
optional bool pp10  = 28;
optional bool pp11  = 29;
optional bool pp12  = 30;
optional bool pp13  = 31;
optional bool pp14  = 32;
optional bool pp15  = 33;
optional bool pp16  = 34;
optional bool pp17  = 35;
optional bool pp18  = 36;
optional bool pp19  = 37;
optional bool pp20  = 38;
optional bool pp21  = 39;
optional bool pp22  = 40;
optional bool pp23  = 41;
optional bool pp24  = 42;
optional bool pp25  = 43;
optional bool pp26  = 44;
optional bool pp27  = 45;
optional bool pp28  = 46;
optional bool pp29  = 47;
optional bool pp30  = 48;
optional bool pp31  = 49;
optional bool pp32  = 50;
optional bool pp33  = 51;
optional bool pp34  = 52;
optional bool pp35  = 53;
optional bool pp36  = 54;
optional bool pc01  = 55;
optional bool pc02  = 56;
optional bool pc03  = 57;
optional bool pc04  = 58;
optional bool pc05  = 59;
optional bool pc06  = 60;
optional bool pc19  = 61;
optional bool pc20  = 62;
optional bool pc21  = 63;
optional bool pc22  = 64;
optional bool pc23  = 65;
optional bool pc24  = 66;
}

2012-02-22T16:46:28.9670320+00:00	2556001	13056	14406	8119	0071	0193	0150	0000	0000	0000	0000	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0
2012-02-22T16:46:28.9770284+00:00	2556002	13054	14405	8119	0069	0192	0151	0000	0000	0000	0000	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0
2012-02-22T16:46:28.9870216+00:00	2556003	13049	14404	8119	0070	0194	0152	0000	0000	0000	0000	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0
2012-02-22T16:46:28.9970430+00:00	2556004	13051	14401	8118	0071	0193	0152	0000	0000	0000	0000	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0
2012-02-22T16:46:29.0070267+00:00	2556005	13049	14397	8116	0069	0191	0156	0000	0000	0000	0000	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0

#### script to reformat data to csv data.

In [2]:
import csv

# Update this path accordingly
input_file = f'{base_path}/data/manufacturing-equiptment(DEBS)/DEBS2012-small.txt'
output_file = f'{base_path}/data/manufacturing-equiptment(DEBS)/DEBS2012-small.csv'

with open(input_file, 'r') as txt_file, open(output_file, 'w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    for line in txt_file:
        # Split the line into columns. Adjust the delimiter if needed.
        row = line.strip().split()
        if len(row) >= 14:
            # Write only the first 14 columns to the CSV file.
            writer.writerow(row[:14])

#### Untested script to reformat whole file 

In [None]:
#### UnTested script to reformat whole file 
import csv
import sys

def convert_txt_to_csv(input_file, output_file):
    with open(input_file, 'r') as infile, open(output_file, 'w', newline='') as outfile:
        writer = csv.writer(outfile)
        for line in infile:
            # Strip whitespace and split on tab
            row = line.strip().split('\t')
            # Optionally skip non-data rows (e.g., if a row doesn't have 56 columns)
            if len(row) != 56:
                # Uncomment the next line to skip rows that do not have 56 columns
                # continue
                pass
            writer.writerow(row)


input_file = f'{base_path}/data/manufacturing-equiptment(DEBS)/DEBS2012-small.txt'
output_file = f'{base_path}/data/manufacturing-equiptment(DEBS)/DEBS2012-small.csv'

convert_txt_to_csv(input_file, output_file)