In [1]:
import pandas as pd
from joblib import load
from sklearn.metrics import f1_score

In [2]:
def final_fun_1(X):
    """ 
    Function to make final predictions
    takes raw test data as input and prepocesses
    returns predicted class label
    """
    
    # loading the minimax scaler
    scaler = load('minimax_scaler.joblib')
    # loading the trained model
    model = load('random_forest.joblib')
    
    # final sensors
    final_sensors = ['sensor_00', 'sensor_04', 'sensor_06', 'sensor_07', 
                 'sensor_08', 'sensor_09', 'sensor_10', 'sensor_11', 
                 'sensor_12']
    
    data = {}

    for sensor in final_sensors:
        # filling missing values with -1
        X[sensor].fillna(-1, inplace=True)
        data[sensor] = X[sensor]
    
    # creating dataframe
    data_df = pd.DataFrame(data)

    # normalizing the data
    data_df = scaler.transform(data_df)

    # prediction
    y = model.predict(data_df)
    prob = model.predict_proba(data_df)
    print(prob)
    
    return y

In [3]:
def final_fun_2(X, Y):
    """ 
    Function to make predictions
    takes raw test data as input and prepocesses
    returns predicted macro f1-score
    """

    # loading the minimax scaler
    scaler = load('minimax_scaler.joblib')
    # loading the trained model
    model = load('random_forest.joblib')

    # convert series to dataframe
    Y = Y.to_frame()
    # converting recovery state to broken state
    Y['machine_status'] = Y['machine_status'].map(lambda 
                          label:  'BROKEN' if label != 'NORMAL' else 'NORMAL')

    # encoding machine status 
    # 0: Normal state
    # 1: Broken state                     
    Y['label'] = Y['machine_status'].map(lambda label: 0
                                            if label == 'NORMAL' else 1) 
    
    # final sensors
    final_sensors = ['sensor_00', 'sensor_04', 'sensor_06', 'sensor_07', 
                 'sensor_08', 'sensor_09', 'sensor_10', 'sensor_11', 
                 'sensor_12']
    
    data = {}

    for sensor in final_sensors:
        # filling missing values with -1
        X[sensor].fillna(-1, inplace=True)
        data[sensor] = X[sensor]

    labels = [None] * (X.shape[0])

    for i in range(0, X.shape[0]-10):
        labels[i] = Y['label'][i+10]

    data['label'] = labels

    # creating dataframe
    data_df = pd.DataFrame(data)

    # dropping last rows with null value
    data_df.drop(data_df.tail(10).index, inplace=True)  

    # y data
    data_y = data_df['label']
    # x data
    data_x = data_df.drop(columns='label')


    # normalizing the data
    X_test = scaler.transform(data_x)
    # prediction
    y_pred = model.predict(X_test)

    y_true = data_y

    # macro f1 score
    f1_macro = f1_score(y_true, y_pred, average='macro')

    return f1_macro

### Testing final_fun_1:

In [5]:
X = pd.read_csv("raw_X_test.csv", nrows=1)
X.head()

Unnamed: 0.1,Unnamed: 0,timestamp,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,sensor_07,...,sensor_43,sensor_44,sensor_45,sensor_46,sensor_47,sensor_48,sensor_49,sensor_50,sensor_51,machine_status
0,131000,2018-06-30 23:20:00,,36.501736,39.0625,35.763889,3.451967,99.999878,,,...,27.343748,27.777779,28.645834,28.067129,29.513889,28.06713,28.93518,29.513889,,RECOVERING


In [6]:
print(final_fun_1(X))

[[0.01227126 0.98772874]]
[1.]


### Testing final_fun_2:

In [6]:
X = pd.read_csv("raw_X_test.csv")
X.head()

Unnamed: 0.1,Unnamed: 0,timestamp,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,sensor_07,...,sensor_43,sensor_44,sensor_45,sensor_46,sensor_47,sensor_48,sensor_49,sensor_50,sensor_51,machine_status
0,131000,2018-06-30 23:20:00,,36.501736,39.0625,35.763889,3.451967,99.999878,,,...,27.343748,27.777779,28.645834,28.067129,29.513889,28.06713,28.93518,29.513889,,RECOVERING
1,131001,2018-06-30 23:21:00,,36.50174,39.0625,35.763889,3.336227,99.999878,,,...,27.343748,27.77778,28.64583,28.067129,29.513889,27.77778,28.64583,29.513889,,RECOVERING
2,131002,2018-06-30 23:22:00,,36.45833,39.0625,35.763889,3.336227,99.999878,,,...,27.343748,27.48843,28.06713,28.067129,29.513889,28.06713,28.93518,29.513889,,RECOVERING
3,131003,2018-06-30 23:23:00,,36.458332,39.0625,35.763889,3.104745,99.999878,,,...,27.34375,27.488426,28.64583,28.06713,29.513889,27.77778,28.935184,29.513889,,RECOVERING
4,131004,2018-06-30 23:24:00,,36.45833,39.0625,35.76389,2.798032,99.999878,,,...,27.08333,27.488426,28.06713,27.77778,29.513889,27.777779,28.93518,29.513889,,RECOVERING


In [7]:
Y = X['machine_status']
Y.head()

0    RECOVERING
1    RECOVERING
2    RECOVERING
3    RECOVERING
4    RECOVERING
Name: machine_status, dtype: object

In [8]:
print(final_fun_2(X, Y))

0.9963262396049439
