In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import List, Tuple, Union

In [5]:
### Get a list of all files in the SignalExport folder

import os

coil_list = os.listdir('./SignalExport')
coil_list.sort()

In [6]:
### load the CoilData.csv file

coil_data = pd.read_csv('./CoilData/CoilData.csv')
coil_data = coil_data.set_index('coil').sort_index()

# Overview of functions

In this section we introduce a number of functions that will help us identify constrictions. For each coil in the SignalExport folder we want to perform the following steps:
<ol>
    <li> Retrieve the length and width values (get_values) <li\>
    <li> Check if the sizes of these lists match (check_size) <li\>
    <li> Check if there are no extreme values (check_extreme) <li\>
    <li> Check if the B4 curve is below the B5 curve in the interval [80:140] (upside_down()) <li\>
    <li> Chop the interval into 30 pieces and average over these pieces (average()) <li\>
    <li> Calculate error and put into a dataframe <li\>
</ol>

For each step we create a seperate function

In [7]:
def get_values(coil_number: Union[int, str], B:str) -> Tuple[List[float], List[float]]:
    '''
    Returns the the x-values and y-values of a given coil before or after hot rolling (B4 or B5)

    :param coil_number: str or int the is the coil number
    :param B: str that is either B4 or B5

    returns two lists
    '''
    data = []
    with open('./SignalExport/'+str(coil_number)+ B +'.csv', 'r') as file:
        string = file.read()
        data = string.split(';')
    if 'Values' in data:
        start_y = data.index('Values')
        y_val = data[start_y+1:-1]
    else:
        y_val = [] 
    if 'Lengthpoints:' in data:
        start_x = data.index('Lengthpoints:')
        x_val = data[start_x+1:start_y]
    else:
        x_val = []  
    x_val = list(map(float, x_val))
    y_val = list(map(float, y_val))
    return x_val, y_val
    

In [8]:
def size_check(x_val: List[float],  y_val: List[float]) -> bool:
    ''' 
    Function that checks wether the size of x_val and y_val are equal and stricly positive
    
    :param x_val, y_val:  two lists
    
    return a Boolean
    '''
    if y_val and len(x_val) == len(y_val):
        return True
    else:
        False

In [9]:
def get_y_interval(x_val: List[float],  y_val: List[float], lower: int, upper: int) -> np.array:
    '''
    Function that returns a list of y_values based on where the x_values fall inbetween the lower and upper values

    :param x_val: lists of floats
    :param y_val: lists of floats
    :param lower: int that represents the lower bound
    :param upper: int that represents the upper bound
    '''
    return np.array([y_val[i] for i in range(len(y_val)) if x_val[i]>lower and x_val[i]<upper])

In [10]:
def extreme_check(x_val: List[float], y_val:List[float]) -> bool:
    '''
    Function that checks if y_val has no values below 1000 on the indices where x_val is between 100 and 170

    :param x_val: lists of floats
    :param y_val: lists of floats 
    '''
    y_val_sub = get_y_interval(x_val, y_val, 100, 170)
    if y_val_sub.min()<1000:
        return False
    else:
        return True

In [11]:
def upside_down(x_val_B4: List[float], y_val_B4: List[float], x_val_B5: List[float], y_val_B5: List[float]) -> np.array:
    '''
    Function that checks if the B5 curve is consistently above the B4 curve. 
    It returns the difference of the average values of B4 and B5 in the interval [120, 140].

    :param x_val_B4: lists of floats
    :param y_val_B4: lists of floats
    :param x_val_B5: lists of floats
    :param y_val_B5: lists of floats
    '''
    y_B4 = get_y_interval(x_val_B4, y_val_B4, 120, 140).mean()
    y_B5 = get_y_interval(x_val_B5, y_val_B5, 120, 140).mean()
    return y_B4-y_B5
    

In [12]:
def average(x_val: List[float], y_val:List[float], length: int, lower: int, upper: int) -> np.array:
    '''
    Function that returns a numpy array. The interval [140:170] is chopped in pieces of length 'length'.
    For each piece the average of the y-values in this piece is calculated. Each of these values
    are put into a numpy array

    :params x-val: list of int representing the x-values of a coil
    :params y-val: list of int representing the y-values of a coil
    :param length: int that is the length of a piece
    :param lower: int that represents the lower bound
    :param upper: int that represents the upper bound
    '''
    n = int((upper-lower)/length)
    index=0
    while x_val[index]<lower:
        index+=1
    y_means = []
    for i in range(n):
        sum = 0
        subindex = 0
        while x_val[index]<lower+1+i*length:
            sum += y_val[index]
            index += 1
            subindex += 1
        if subindex != 0:
            y_means.append(sum/subindex)
        else:
            y_means.append(0)
    return np.array(y_means)
    

In [13]:
def max_score(y_means_B4: List[float], y_means_B5: List[float]) -> float:
    '''
    Function that returns the maximum positive difference between two lists

    :param y_means_B4: list of floats
    :param y_means_B5: list of floats
    '''
    return (y_means_B4-y_means_B5).max()

def sqr_score(y_means_B4: List[float], y_means_B5: List[float]) -> float:
    '''
    Function that returns the sum of the squares of positive differences between two lists

    :param y_means_B4: list of floats
    :param y_means_B5: list of floats
    '''
    return np.array([i**2 for i in y_means_B4-y_means_B5 if i>0]).sum()

## Calculating the error

We repeat how the pandas dataframe is created:
<ol>
    <li> Retrieve the length and width values (get_values) <li\>
    <li> Check if the sizes of these lists match (check_size) <li\>
    <li> Check if there are no extreme values (check_extreme) <li\>
    <li> Check if the B4 curve is below the B5 curve in the interval [80:140] (upside_down()) <li\>
    <li> Chop the interval into 30 pieces and average over these pieces (average()) <li\>
    <li> Calculate error and put into a dataframe <li\>
</ol>

In [14]:
def score_table(score_type: str) -> pd.Series:
    '''
    Function that creates a pandas Series with scores for each coil

    :param score_type: str that must be either Sqr or Max depending on the type of scoring one uses
    '''
    dict = {}
    for coil in coil_data.index:
        
            #Check if we have the data of a given coil
            if (str(coil)+'B4'+'.csv' not in coil_list) or (str(coil)+'B5'+ '.csv' not in coil_list):
                continue

            #retrieve the data
            x_val_B4, y_val_B4 = get_values(coil, 'B4')
            x_val_B5, y_val_B5 = get_values(coil, 'B5')

            # check the sizes of the lists
            if not (size_check(x_val_B4, y_val_B4) and size_check(x_val_B5, y_val_B5)):
                continue
        
            #check for extreme values
            if not (extreme_check(x_val_B4, y_val_B4) and extreme_check(x_val_B5, y_val_B5)):
                continue
        
            # check if the curves are well-behaved before [140,170]
            faulty_measurement = upside_down(x_val_B4, y_val_B4, x_val_B5, y_val_B5)
            if faulty_measurement>0:
                continue
        
            # Calculate the means over each meter
            y_means_B4 = average(x_val_B4, y_val_B4, 1, 140, 170)
            y_means_B5 = average(x_val_B5, y_val_B5, 1, 140, 170)

            #Calculate the error and put it in a dictionary
            if score_type == 'Sqr':
                dict[coil] = sqr_score(y_means_B4, y_means_B5)
            if score_type == 'Max':
                dict[coil] = max_score(y_means_B4, y_means_B5)

    # We store the data in a pandas Dataframe
    score_values = pd.Series(dict).sort_values(ascending=False)  
    score_values = score_values.dropna() 
    score_values.name = score_type + '_score'
    return score_values

# Create csv with target column

In [15]:
def scores_to_csv(score_type:str) -> None:
    '''
    Function that that creates .csv from CoilData.csv and add the scores for each coil to it.

    :param score_type: str that must be either Sqr or Max depending on the type of scoring one uses
    '''
    new_coil_data = coil_data.join(score_table(score_type), how='inner')
    new_coil_data.index.names = ['coil']
    new_coil_data.to_csv('./NewData/Coil'+score_type+'Error.csv')

In [16]:
# create the appropriate csv files

#scores_to_csv('Sqr')
#scores_to_csv('Max')

# Visualisation

In [17]:
def coilgraph(coil_number: Union[int, str], x_low: int, x_up:int) -> None:
    '''
    Function that shows the graphs of the width of a coil before and after hot rolling

    :param coil_number: int or string that represents the coil
    :param x_low: int that represents the lower bound of the x-axis
    :param x_up: int that represents the upper bound of the x-axis
    '''
    x_val, y_val = get_values(coil_number, 'B4')
    x_val_5, y_val_5 = get_values(coil_number, 'B5')
    ax = plt.subplot(111)
    ax.plot(x_val, y_val, color='red')
    ax.plot(x_val_5, y_val_5, color='b')
    plt.ylim([y_val[-1]-10, y_val[-1]+10])
    plt.xlim([x_low, x_up])
    plt.title(f'coil: {coil_number}')
    plt.legend(['B4', 'B5'])
    plt.show()

In [19]:
coilgraph('435014', 100,170)