## Introducing Data

In [2]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import functions as fn
import warnings
warnings.filterwarnings('ignore')

### Production Lines

#### Continuous Casting Machine (CCM)

Steel cools, passing through the mould cavity and solidifies after water spraying.

In [13]:
# Cleaned and manipulated data:
df_CCM = pd.read_csv('data/ccm_manipulated_347418.csv', names=['EVENT_ID', 'SEQUENCE_ID', 'R_OS_ID', 'PRODUCTION_LINE_NAME', 'REFERENCE_DATE', 'PIECE_ID', 
 'MATERIAL_ID', 'MOLD_WIDTH', 'WIDTH', 'THICKNESS', 'WEIGHT', 'LENGTH', 'HEAT_ID',
 'STEEL_GRADE_ID_INT', 'EXIT_TEMP', 'SLAB_TRANSITION'], header=None)
print(f"Number of events in the data set: {df_CCM['EVENT_ID'].nunique()}")
df_CCM.head()

Number of events in the data set: 347418


Unnamed: 0,EVENT_ID,SEQUENCE_ID,R_OS_ID,PRODUCTION_LINE_NAME,REFERENCE_DATE,PIECE_ID,MATERIAL_ID,MOLD_WIDTH,WIDTH,THICKNESS,WEIGHT,LENGTH,HEAT_ID,STEEL_GRADE_ID_INT,EXIT_TEMP,SLAB_TRANSITION
0,8360,109,115587,CCM1,13.12.16,16000041-07,148513,#NAME?,1330.0,87,2140000000.0,2440000000.0,16000041.0,26.0,0.0,0
1,8359,109,115587,CCM1,13.12.16,16000041-06,148512,#NAME?,1330.0,87,2140000000.0,2440000000.0,16000041.0,26.0,0.0,0
2,8358,109,115587,CCM1,13.12.16,16000041-05,148511,#NAME?,1330.0,87,2140000000.0,2440000000.0,16000041.0,26.0,0.0,0
3,8357,109,115587,CCM1,13.12.16,16000041-03,148509,#NAME?,1330.0,87,2140000000.0,2440000000.0,16000041.0,26.0,0.0,0
4,8356,109,115587,CCM1,13.12.16,16000041-02,148508,#NAME?,1330.0,87,2140000000.0,2440000000.0,16000041.0,26.0,0.0,0


#### Compact Strip Production (CSP)

Compact plant including CCM,reheating furnace, hot rolling unit and strip processing unit.

In [12]:
# Cleaned and manipulated data:
df_CSP = pd.read_csv('data/csp_manipulated_205496_rev1.csv', header=None)
print(f"Number of events in the data set: {df_CSP[0].nunique()}")
df_CSP.head()

Number of events in the data set: 205496


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,1,1660,0,0,0,0,0,0,1240,66,30.06.17 20:11,3707842260,17078201,49,6.45848,239846,17078201-01
1,2,1660,0,0,0,0,0,0,1229,65,30.06.17 20:19,3707842740,17078201,49,6.2741,239847,17078201-02
2,3,1660,0,0,0,0,0,0,1227,65,30.06.17 20:26,3707843160,17078201,49,6.27613,239848,17078201-03
3,4,1660,0,0,0,0,0,0,1227,65,30.06.17 20:34,3707843640,17078201,49,6.27651,239849,17078201-04
4,5,1660,0,0,0,0,0,0,1227,65,30.06.17 20:41,3707844060,17078201,49,6.27284,239850,17078201-05


#### Pickling Line & Tandem Cold Mill (PLTCM)

Compact plant including a turbulence pickling section and a tandem mill.

In [11]:
# Cleaned and manipulated data:
df_PLTCM = pd.read_csv('data/pltcm_manipulated_59604_rev1.csv', header=None)
print(f"Number of events in the data set: {df_PLTCM[0].nunique()}")
df_PLTCM.head()

Number of events in the data set: 59604


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,1,3609,0,0,0,0,0,0,1264.0446,2.55459,...,1222.6404,2.55459,0.477354,1224.67,1222.6381,318580,17167961-03000,13.5754,2998.57,543.956
1,2,3609,0,0,0,0,0,0,1129.5043,2.73491,...,1089.025,2.73491,0.564271,1091.06,1089.0281,323346,17172941-01000,21.04121,4387.14,875.2338
2,3,3609,0,0,0,0,0,0,1096.1252,2.71956,...,1057.3627,2.71956,0.614289,1059.39,1057.358,323357,17172961-04000,15.52353,3061.01,672.14825
3,4,3609,0,0,0,0,0,0,1129.7084,2.73506,...,1089.025,2.73506,0.564197,1091.06,1089.0281,323344,17172921-05000,21.02293,4386.41,875.66516
4,5,3609,0,0,0,0,0,0,1569.3829,2.63399,...,1528.7625,2.63399,0.569115,1529.56,1528.765,318740,17168341-04000,17.28409,2558.96,536.5385


#### Continuous Galvanizing Line (CGL)

Application of protective zinc coating on the steel surface to improve corrosion resistance.

In [10]:
# Cleaned and manipulated data:
df_CGL = pd.read_csv('data/cgl_manipulated_27147_rev1.csv', header=None)
print(f"Number of events in the data set: {df_CGL[0].nunique()}")
df_CGL.head()

Number of events in the data set: 27147


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,1,3712,0,26,951.0,0.49,15.688,4289.313,950.07294,0.485686,10-JAN-18 07.28.57.000000000 AM -06:00,3724579737
1,2,3712,0,26,950.0,0.51,15.886,4177.5107,950.07294,0.48593,10-JAN-18 07.59.01.000000000 AM -06:00,3724581541
2,3,3712,0,26,950.0,0.5,15.84,4248.722,950.07294,0.485982,10-JAN-18 08.29.00.000000000 AM -06:00,3724583340
3,4,3712,0,26,951.0,0.52,15.847,4083.0,950.07294,0.486016,10-JAN-18 08.58.55.000000000 AM -06:00,3724585135
4,5,3712,0,26,951.0,0.52,15.85,4083.5903,950.07294,0.486041,10-JAN-18 09.27.54.000000000 AM -06:00,3724586874


#### After manipulation and cleaning steps were performed, CCM, CSP, PLTCM, and CGL data sets have the number of events, 347418, 205496, 59604, and 27147, respectively.

The decreasing number of events through the data sets shows that the output of a production line is not always an input for the next in line and might be excluded from downstream production lines, as previously mentioned. As shown in Fig. 1.2, the number of events handled decreases with a slightly reduced width variety from CSP to CGL. Moreover, the production events in PLTCM and CGL have a wide diversity and precision in a narrow range for the thickness feature. In contrast, the events in CSP have a precise unimodal distribution gathered around a single value in a broad range of the same feature values.