In [1]:
import os
import pandas as pd
import numpy as np
import reliability
from reliability.Fitters import Fit_Weibull_2P
from reliability.Probability_plotting import plot_points
from io import BytesIO
import base64
import matplotlib.pyplot as plt

In [10]:
# Helper Functions
def calculate_censor(row):
    if row['Machine_No'] == row['Machine_Lag']:
        list_of_fail_PRs = ['Trouble Shooting', 'DOA', "DOI"]
        if row['PR_Code_Lag'] in list_of_fail_PRs:
            return "F"
        else:
            return "S"
    else:
        return "S"


def calculate_TTF(row):
    if row['Machine_No'] == row['Machine_Lag']:
        ttf_days = (row['Fail_Date'] - row['Install_Date']).days
        return ttf_days / 365
    else:
        ttf_days = (row['Data_Pulled_Date'] - row['Install_Date']).days
        return ttf_days / 365

In [4]:
raw_data = pd.read_csv("raw_data.csv")

In [5]:
items_subset_list = list(pdf.Items_.value_counts()[1:].index) #Remove other from the list

In [23]:
final_ttf_pdf = pd.DataFrame(columns=['Item', 'Install_Date', 'Fail_Date', 'Apply_Date',
                                      "Data_Pulled_Date", 'Machine_No', 'Machine_Lag',
                                      'PR_Code', 'PR_Code_Lag', 'Censor', 'TTF'])

In [26]:
for item in items_subset_list:
    filtered_data = raw_data[raw_data["Items_"] == item]

    # remove empty apply date rows
    filtered_data = filtered_data.dropna(subset=['APPLY_DATE'])

    # sort values by APPLY DATE and machine
    sorted_data = filtered_data.sort_values(['APPLY_DATE', 'MACHINE_NO'])

    weibull_pdf = pd.DataFrame(columns=['Item', 'Install_Date', 'Fail_Date', 'Apply_Date',
                                        "Data_Pulled_Date", 'Machine_No', 'Machine_Lag',
                                        'PR_Code', 'PR_Code_Lag', 'Censor', 'TTF'])
    weibull_pdf['Install_Date'] = pd.to_datetime(sorted_data['INSTALLED_DATE'])
    weibull_pdf['Apply_Date'] = pd.to_datetime(sorted_data['APPLY_DATE'])
    weibull_pdf['Fail_Date'] = weibull_pdf['Apply_Date'].shift(-1)
    weibull_pdf['Data_Pulled_Date'] = pd.Timestamp('11/25/2020')
    weibull_pdf['Machine_No'] = sorted_data['MACHINE_NO']
    weibull_pdf['Machine_Lag'] = weibull_pdf['Machine_No'].shift(-1)
    weibull_pdf['PR_Code'] = sorted_data['PARTS_REQUEST_REASON_CODE']
    weibull_pdf['PR_Code_Lag'] = weibull_pdf["PR_Code"].shift(-1)
    weibull_pdf['Item'] = item
    weibull_pdf.reset_index(drop=True, inplace=True)

    weibull_pdf['Censor'] = weibull_pdf.apply(lambda row: calculate_censor(row), axis=1)
    weibull_pdf['TTF'] = weibull_pdf.apply(lambda row: calculate_TTF(row), axis=1)

    filtered_weibull_pdf = weibull_pdf[weibull_pdf['TTF'] > 0]
    filtered_weibull_pdf = filtered_weibull_pdf.dropna(subset=['TTF'])
    filtered_weibull_pdf = filtered_weibull_pdf[filtered_weibull_pdf['PR_Code'] != "DOA"]
    if filtered_weibull_pdf.empty:
        continue
    final_ttf_pdf = final_ttf_pdf.append(filtered_weibull_pdf)

In [28]:
final_ttf_pdf[final_ttf_pdf['Item'] == "Computer"]

Unnamed: 0,Item,Install_Date,Fail_Date,Apply_Date,Data_Pulled_Date,Machine_No,Machine_Lag,PR_Code,PR_Code_Lag,Censor,TTF
0,Computer,2014-01-14,2014-01-21,2014-01-03,2020-11-25,7003,2117.0,Sales Request(CSR),Trouble Shooting,S,6.868493
1,Computer,2014-01-14,2014-01-21,2014-01-21,2020-11-25,2117,7003.0,Trouble Shooting,Sales Request(CSR),S,6.868493
3,Computer,2014-01-24,2014-01-24,2014-01-21,2020-11-25,7003,2117.0,Sales Request(CSR),DOA,S,6.841096
5,Computer,2014-02-11,2014-02-06,2014-02-06,2020-11-25,1047,1122.0,Trouble Shooting,Trouble Shooting,S,6.791781
6,Computer,2014-02-20,2014-02-06,2014-02-06,2020-11-25,1122,1124.0,Trouble Shooting,Trouble Shooting,S,6.767123
...,...,...,...,...,...,...,...,...,...,...,...
622,Computer,2020-08-28,2020-09-09,2020-09-06,2020-11-25,2010,2224.0,Trouble Shooting,Trouble Shooting,S,0.243836
623,Computer,2020-09-15,2020-09-21,2020-09-09,2020-11-25,2224,7022.0,Trouble Shooting,DOI,S,0.194521
624,Computer,2020-09-22,2020-09-28,2020-09-21,2020-11-25,7022,10002.0,DOI,Trouble Shooting,S,0.175342
625,Computer,2020-10-29,2020-09-30,2020-09-28,2020-11-25,10002,1058.0,Trouble Shooting,Trouble Shooting,S,0.073973


In [29]:
final_ttf_pdf.to_csv("TTF_Calculation.csv", index = False)