# BMW Project Scouting

In [1]:
from pandas import read_csv
from bmwprophet.preprocessing import key_value_mapper

In [2]:
data = read_csv("data/Dry_room_Last_month.csv", parse_dates=[0, 1, 7])

## Quick Column Exploration

In [3]:
for column in data.columns:
    unique_values =  len(data[column].value_counts())
    flag = "!" if unique_values == 1 else " "
    print(f"[{flag}]: Column '{column}' has {unique_values} unique values")
    del flag, unique_values

[ ]: Column 'ts_utc' has 823997 unique values
[ ]: Column 'trigger_ts_utc' has 42827 unique values
[ ]: Column 'part_item_number' has 2 unique values
[!]: Column 'part_id' has 1 unique values
[!]: Column 'opc_ua_status' has 1 unique values
[ ]: Column 'opc_ua_has_changed_since_last_publish' has 2 unique values
[ ]: Column 'message_id' has 42827 unique values
[ ]: Column 'measurement_date' has 31 unique values
[ ]: Column 'measured_value_datatype' has 2 unique values
[ ]: Column 'measured_value' has 86738 unique values
[!]: Column 'machine_id' has 1 unique values
[!]: Column 'line' has 1 unique values
[ ]: Column 'data_key_val_string' has 94 unique values


In [4]:
use_columns = [
    "trigger_ts_utc",
    "measured_value",
    "data_key_val_string"
]

## Data Cleaning

In [5]:
projected_data = data[use_columns].copy()

## More Scouting on last column

In [6]:
column = data["data_key_val_string"]
column_new = column.map(key_value_mapper)

projected_data.loc[:,"ns"], \
    projected_data.loc[:,"machine"], \
    projected_data.loc[:,"errection_location"], \
    projected_data.loc[:,"installation_location"], \
    projected_data.loc[:,"components"], \
    projected_data.loc[:,"sensor_type"] = zip(*column_new)

projected_data.drop("data_key_val_string", axis=1, inplace=True)

In [7]:
projected_data.head()

Unnamed: 0,trigger_ts_utc,measured_value,ns,machine,errection_location,installation_location,components,sensor_type
0,2024-10-31 01:14:50.846157+00:00,30944.0,5,==0192E0020=C03DR3,,,,DataPublishTrigger
1,2024-10-31 12:51:52.139911+00:00,0.004774,5,==0192E0020=C03DR3,++ST000,+AP002,-BF02,ConsumptionWater
2,2024-10-31 12:51:52.139911+00:00,3.647569,5,==0192E0020=C03DR3,++ST000,+AZ001,-BF01,ConsumptionWater
3,2024-10-31 12:51:52.139911+00:00,5.999756,5,==0192E0020=C03DR3,++ST000,+AZ001,-BT31,Temperature
4,2024-10-31 12:51:52.139911+00:00,12.099854,5,==0192E0020=C03DR3,++ST000,+AZ001,-BT32,Temperature


In [8]:
projected_data.to_csv("data/preprocessed_data.csv")