In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
from glob import glob
from sklearn.model_selection import train_test_split
import os
import h5py
import time
from datetime import timedelta
import seaborn as sns
from pandas import DataFrame
import matplotlib.pyplot as plt
from matplotlib import gridspec
%matplotlib inline

In [2]:
# moving_average(data smoothing)
def moving_average_ratio(data, window_ratio):
    window_size = int(window_ratio * len(data))
    half_window = window_size // 2
    kernel = np.ones(window_size) / window_size
    smoothed_data = np.convolve(data.values, kernel, mode='same')
    smoothed_series = pd.Series(smoothed_data, index=data.index)
    # 처음과 끝 부분 처리
    smoothed_series.iloc[:half_window] = data.iloc[:half_window].mean()
    smoothed_series.iloc[-half_window:] = data.iloc[-half_window:].mean()
    
    return smoothed_series

# Phase data 추가
def getPhaseData(df) :
    df = df.copy()
    # altitude와 altitudeShifted 계산
    altitude = df['alt'].iloc[:-1].reset_index(drop=True) # index reset
    altitudeShifted = df['alt'].iloc[1:].reset_index(drop=True) # index reset
    # alt 변화 계산
    altChange = altitudeShifted - altitude
    # AltChange 데이터 smoothing
    smoothedAltChange = moving_average_ratio(altChange, 0.05)
    smoothedAltChange = np.array(smoothedAltChange)
    # Cruise/Climb/Descend 영역 인덱스 생성
    cruiseIdx = np.logical_and(np.greater_equal(smoothedAltChange, -2), np.less_equal(smoothedAltChange, 2))
    climbIdx = np.greater(smoothedAltChange, 2)
    descendIdx = np.less(smoothedAltChange, -2)
    # altitude 데이터 가져오기
    df = df[:-1] # altChange 계산결과와 맞추기 위해 행 하나 줄임
    df.loc[:,"climb"] = climbIdx
    df.loc[:,"cruise"] = cruiseIdx
    df.loc[:,"descend"] = descendIdx
    df = df.loc[(df['climb'] == True)|(df['cruise'] == True)|(df['descend'] == True)]
    
    return df

# 파생 변수 생성
def gen_additional_var(df) :
    df['hpc_lpc_temp'] = (df['T30'] - df['T24']).abs()/df['T30']
    df['hpt_hpc_temp'] = (df['T48'] - df['T30']).abs()/df['T48']
    df['lpt_hpt_temp'] = (df['T50'] - df['T48']).abs()/df['T50']

    df['lpc_fan_press'] = (df['P24'] - df['P21']).abs()/df['P24']
    df['hpc_lpc_press'] = (df['P30'] - df['P24']).abs()/df['P30']
    df['hpt_hpc_press'] = (df['P45'] - df['P30']).abs()/df['P45']
    df['lpt_hpt_press'] = (df['P50'] - df['P45']).abs()/df['P50']

    df['hpc_lpc_temp_ratio'] = (df['T30'] / df['T24'])
    df['hpt_hpc_temp_ratio'] = (df['T48'] / df['T30'])
    df['lpt_hpt_temp_ratio'] = (df['T50'] / df['T48'])

    df['lpc_fan_press_ratio'] = (df['P24'] / df['P21'])
    df['hpc_lpc_press_ratio'] = (df['P30'] / df['P24'])
    df['hpt_hpc_press_ratio'] = (df['P45'] / df['P30'])
    df['lpt_hpt_press_ratio'] = (df['P50'] / df['P45'])
    
    df['hpt_entry_temp_drop'] = (df['T40']- df['T48']).abs() / df['T40']
    df['lpt_entry_temp_drop'] = (df['T48'] - df['T50']).abs() / df['T48']
    df['hpt_entry_press_ratio'] = df['P45']/ df['P40']
    df['fan_exit_press'] = df['SmFan'] / df['P21']
    df['hpc_exit_press'] = (df['SmHPC']) / df['Ps30']
    df['hpc_entry_temp'] = df['SmLPC'] / df['T2']
    
    return df

# Health 데이터 라벨링
def label_data(df):
    df = df.copy()
    df['failure_mode'].loc[(df['file_name'] == 'N-CMAPSS_DS02-006') & (df['unit'] == 2)] =  'hpt'
    df['failure_mode'].loc[(df['file_name'] == 'N-CMAPSS_DS02-006') & (df['unit'] == 5)] =  'hpt'
    df['failure_mode'].loc[(df['file_name'] == 'N-CMAPSS_DS02-006') & (df['unit'] == 10)] = 'hpt'
    df['failure_mode'].loc[(df['file_name'] == 'N-CMAPSS_DS02-006') & (df['unit'] == 16)] = 'hpt+lpt'
    df['failure_mode'].loc[(df['file_name'] == 'N-CMAPSS_DS02-006') & (df['unit'] == 18)] = 'hpt+lpt'
    df['failure_mode'].loc[(df['file_name'] == 'N-CMAPSS_DS02-006') & (df['unit'] == 20)] = 'hpt+lpt'
    # healthy 변수 추가
    df['tartget'] = df['failure_mode']
    df['tartget'].loc[df['hs'] == 1] = 'healthy'
    
    df['hpt_yn'] = 0
    df['lpt_yn'] = 0
    df['fan_yn'] = 0
    df['hpc_yn'] = 0
    df['lpc_yn'] = 0
    
    df['hpt_yn'].loc[(df['tartget'] == 'hpt') | (df['tartget'] == 'hpt+lpt') | (df['tartget'] == 'all')] = 1
    df['lpt_yn'].loc[(df['tartget'] == 'lpt') | (df['tartget'] == 'hpt+lpt') | (df['tartget'] == 'all')] = 1
    df['fan_yn'].loc[(df['tartget'] == 'fan') | (df['tartget'] == 'all')] = 1
    df['hpc_yn'].loc[(df['tartget'] == 'hpc') | (df['tartget'] == 'hpc+lpc') | (df['tartget'] == 'all')] = 1
    df['lpc_yn'].loc[(df['tartget'] == 'hpc+lpc') | (df['tartget'] == 'all')] = 1
    
    return df

# 각 컬럼에서 data smoothing 후, min, max
def generate_max_min(df, window):
    # 이동평균 계산
    rolling_mean_df = df.rolling(window=window).mean()
    # 최대값 계산
    max_df = pd.DataFrame(rolling_mean_df.max()).transpose()
    max_df.columns = df.columns + '_max'
    # 최소값 계산
    min_df = pd.DataFrame(rolling_mean_df.min()).transpose()
    min_df.columns = df.columns + '_min'
    return pd.concat([max_df, min_df], axis=1)

def generate_mean(df, window) :
    df = df.rolling(window=window).mean()
    mean_df = pd.DataFrame(df.mean()).transpose()
    mean_df.columns = df.columns + '_mean'    
    return mean_df

def generate_median(df, window) :
    df = df.rolling(window=window).mean()
    median_df = pd.DataFrame(df.median()).transpose()
    median_df.columns = df.columns + '_median'    
    return median_df

def generate_std(df, window) :
    df = df.rolling(window=window).mean()
    std_df = pd.DataFrame(df.std()).transpose()
    std_df.columns = df.columns + '_std'    
    return std_df

def data_prp(df, use_columns, word):
    df = df.copy()
    df_prp = pd.DataFrame()
    t_list = df[[word+'file_name',word+'unit',word+'cycle',word+'Fc',word+'fileNo',
                     word+'hpt_yn', word+'lpt_yn', word+'fan_yn', word+'hpc_yn', word+'lpc_yn', 
                     word+'flight_time(min)',word+'RUL']].drop_duplicates().reset_index(drop=True)
    
    for i in range(0,t_list.shape[0]):
        print(str(i)+"/"+str(t_list.shape[0]))
        # 해당 데이터셋에서 xs, xv, 신규 파생 변수만 집계값 사용
        df_prp_tmp = df[use_columns].loc[(df[word+'file_name'] == t_list[word+'file_name'][i]) 
                                             & (df[word+'unit'] == t_list[word+'unit'][i]) 
                                             & (df[word+'cycle'] == t_list[word+'cycle'][i])]
        join_df = pd.DataFrame()        
        # max, min 값
        df_minmax = generate_max_min(df_prp_tmp,window=5)
        join_df = df_minmax
        # mean 
        df_mean = generate_mean(df_prp_tmp,window=5)
        join_df = pd.concat([join_df, df_mean], axis=1)
        # median 
        df_median = generate_median(df_prp_tmp,window=5)
        join_df = pd.concat([join_df, df_median], axis=1)
        # std
        df_std = generate_std(df_prp_tmp,window=5)
        join_df = pd.concat([join_df, df_std], axis=1)
        # 맨 마지막에 hs, cylce, unit 추가
        join_df[word+'flight_time(min)'] = t_list[word+'flight_time(min)'][i]
        join_df[word+'unit'] = t_list[word+'unit'][i]
        join_df[word+'cycle'] = t_list[word+'cycle'][i]
        join_df[word+'Fc'] = t_list[word+'Fc'][i]
        join_df[word+'fileNo'] = t_list[word+'fileNo'][i]
        join_df[word+'hpt_yn'] = t_list[word+'hpt_yn'][i]
        join_df[word+'lpt_yn'] = t_list[word+'lpt_yn'][i]
        join_df[word+'fan_yn'] = t_list[word+'fan_yn'][i]
        join_df[word+'hpc_yn'] = t_list[word+'hpc_yn'][i]
        join_df[word+'lpc_yn'] = t_list[word+'lpc_yn'][i]    
        join_df[word+'RUL'] = t_list[word+'RUL'][i]    
        df_prp = pd.concat([df_prp, join_df])
    df_prp.reset_index(drop=True, inplace=True)

    return df_prp

In [24]:
# Time tracking, Operation time (min)
t = time.process_time()

# Get h5 filenames 함수
def get_h5_filenames(folder_path):
    h5_filenames = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".h5"):
            h5_filenames.append(filename)
    return h5_filenames

# file derectory의 전체 파일 읽어서 filename 지정
current_dir = os.getcwd()
h5_filenames = get_h5_filenames(current_dir + "/raw_data")
h5_filenames.sort()
file_order = 7
filename = "raw_data/"+h5_filenames[file_order]
filename

'raw_data/N-CMAPSS_DS08a-009.h5'

In [25]:
h5_file_list = ['N-CMAPSS_DS01-005.h5','N-CMAPSS_DS02-006.h5','N-CMAPSS_DS03-012.h5',
                'N-CMAPSS_DS04.h5','N-CMAPSS_DS05.h5','N-CMAPSS_DS06.h5',
                'N-CMAPSS_DS07.h5','N-CMAPSS_DS08a-009.h5','N-CMAPSS_DS08c-008.h5']

f_mode_list = ['hpt','hpt+lpt','hpt+lpt','fan','hpc','hpc+lpc','lpt','all','all']

with h5py.File(filename, 'r') as hdf:
        # Development(training) set
        W_dev = np.array(hdf.get('W_dev'))             # W 
        X_s_dev = np.array(hdf.get('X_s_dev'))         # X_s
        X_v_dev = np.array(hdf.get('X_v_dev'))         # X_v
        T_dev = np.array(hdf.get('T_dev'))             # T
        Y_dev = np.array(hdf.get('Y_dev'))             # RUL  
        A_dev = np.array(hdf.get('A_dev'))             # Auxiliary

        # Test set
        W_test = np.array(hdf.get('W_test'))           # W
        X_s_test = np.array(hdf.get('X_s_test'))       # X_s
        X_v_test = np.array(hdf.get('X_v_test'))       # X_v
        T_test = np.array(hdf.get('T_test'))           # T
        Y_test = np.array(hdf.get('Y_test'))           # RUL  
        A_test = np.array(hdf.get('A_test'))           # Auxiliary
        
        # Varnams
        W_var = np.array(hdf.get('W_var'))
        X_s_var = np.array(hdf.get('X_s_var'))  
        X_v_var = np.array(hdf.get('X_v_var')) 
        T_var = np.array(hdf.get('T_var'))
        A_var = np.array(hdf.get('A_var'))
        
        # from np.array to list dtype U4/U5
        W_var = list(np.array(W_var, dtype='U20'))
        X_s_var = list(np.array(X_s_var, dtype='U20'))  
        X_v_var = list(np.array(X_v_var, dtype='U20')) 
        T_var = list(np.array(T_var, dtype='U20'))
        A_var = list(np.array(A_var, dtype='U20'))
                          
W = np.concatenate((W_dev, W_test), axis=0)  
X_s = np.concatenate((X_s_dev, X_s_test), axis=0)
X_v = np.concatenate((X_v_dev, X_v_test), axis=0)
T = np.concatenate((T_dev, T_test), axis=0)
Y = np.concatenate((Y_dev, Y_test), axis=0) 
A = np.concatenate((A_dev, A_test), axis=0)

# df _ W, Xs, Y, A
df_W = DataFrame(data=W, columns=W_var)
df_Xs = DataFrame(data=X_s, columns=X_s_var)
df_Xv = DataFrame(data=X_v, columns=X_v_var)
df_T = DataFrame(data=T, columns=T_var)
df_Y = DataFrame(data=Y, columns=['RUL'])
df_A = DataFrame(data=A, columns=A_var)
df_A["failure_mode"] = f_mode_list[file_order]
data_name = filename.split('/')[-1]
data_name = data_name.split('.')[0]
df_A["file_name"] = data_name

df_tmp = pd.concat([df_W, df_Xs, df_Xv, df_T, df_Y, df_A], axis=1)

In [26]:
# phase feature 추가
df_with_phase = pd.DataFrame(columns=df_tmp.columns)
unit_cycle_list = df_tmp[['unit', 'cycle']].drop_duplicates().reset_index(drop=True)

for i in range(0, unit_cycle_list.shape[0]):
    #진행현황
    print("Processing: "+str(i)+"/"+str(unit_cycle_list.shape[0]))
    data_with_phase = df_tmp.loc[(df_tmp['unit'] == unit_cycle_list['unit'][i])
                                &(df_tmp['cycle'] == unit_cycle_list['cycle'][i])]
    data_with_phase = getPhaseData(data_with_phase)
    cols_to_convert = ['climb', 'cruise', 'descend']
    for col in cols_to_convert:
        data_with_phase[col] = data_with_phase[col].astype(bool)

    df_with_phase = pd.concat((df_with_phase, data_with_phase), axis=0)
    for col in cols_to_convert:
        df_with_phase[col] = df_with_phase[col].astype(bool)

#비행시간 추출
flight_time = pd.DataFrame(df_A.groupby(["unit","cycle"])["Fc"].count()).reset_index()
flight_time["flight_time(min)"] = round(flight_time["Fc"]/60,2)
df_with_phase = df_with_phase.merge(flight_time[["unit","cycle","flight_time(min)"]], 
                                    how = 'left', on = ["unit","cycle"])

Processing: 0/994
Processing: 1/994
Processing: 2/994
Processing: 3/994
Processing: 4/994
Processing: 5/994
Processing: 6/994
Processing: 7/994
Processing: 8/994
Processing: 9/994
Processing: 10/994
Processing: 11/994
Processing: 12/994
Processing: 13/994
Processing: 14/994
Processing: 15/994
Processing: 16/994
Processing: 17/994
Processing: 18/994
Processing: 19/994
Processing: 20/994
Processing: 21/994
Processing: 22/994
Processing: 23/994
Processing: 24/994
Processing: 25/994
Processing: 26/994
Processing: 27/994
Processing: 28/994
Processing: 29/994
Processing: 30/994
Processing: 31/994
Processing: 32/994
Processing: 33/994
Processing: 34/994
Processing: 35/994
Processing: 36/994
Processing: 37/994
Processing: 38/994
Processing: 39/994
Processing: 40/994
Processing: 41/994
Processing: 42/994
Processing: 43/994
Processing: 44/994
Processing: 45/994
Processing: 46/994
Processing: 47/994
Processing: 48/994
Processing: 49/994
Processing: 50/994
Processing: 51/994
Processing: 52/994
Pro

In [27]:
# 파생 변수 추가
df_with_phase_add_var = gen_additional_var(df_with_phase)
# health label 추가
df_with_phase_add_var_label = label_data(df_with_phase)
df_PVL = df_with_phase_add_var_label.copy()
# fileNo, Fc 추가
df_PVL['fileNo'] = file_order

df_all = df_PVL.copy()


del(df_tmp)
del(df_with_phase)
del(df_with_phase_add_var)
del(df_with_phase_add_var_label)

df_PVL.drop(["tartget","failure_mode"],axis=1, inplace= True)

# df_cruise / df_climb 나눔(컬럼명도 재조정)
df_climb = df_PVL.loc[df_PVL['climb'] == True].copy()
df_climb.columns = 'climb_' + df_PVL.columns
df_cruise = df_PVL.loc[df_PVL['cruise'] == True].copy()
df_cruise.columns = 'cruise_' + df_PVL.columns

# use 컬럼 준비
W_var = ['alt', 'Mach', 'TRA', 'T2']
xs_var = ['T24', 'T30', 'T48', 'T50', 'P15', 'P2', 'P21', 'P24', 'Ps30', 'P40', 'P50', 'Nf', 'Nc', 'Wf']
X_v_var = ['T40', 'P30', 'P45', 'W21', 'W22', 'W25', 'W31', 'W32', 'W48', 'W50', 'SmFan', 'SmLPC', 'SmHPC', 'phi']
T_var = ['fan_eff_mod', 'fan_flow_mod', 'LPC_eff_mod', 'LPC_flow_mod', 'HPC_eff_mod', 'HPC_flow_mod', 'HPT_eff_mod', 'HPT_flow_mod', 'LPT_eff_mod', 'LPT_flow_mod']
A_var = ['RUL','unit', 'cycle', 'Fc', 'hs']
new_var = ['hpc_lpc_temp', 'hpt_hpc_temp', 'lpt_hpt_temp', 
           'lpc_fan_press', 'hpc_lpc_press', 'hpt_hpc_press', 'lpt_hpt_press', 
           'hpc_lpc_temp_ratio', 'hpt_hpc_temp_ratio', 'lpt_hpt_temp_ratio', 'lpc_fan_press_ratio', 
           'hpc_lpc_press_ratio', 'hpt_hpc_press_ratio', 'lpt_hpt_press_ratio', 'hpt_entry_temp_drop', 
           'lpt_entry_temp_drop', 'hpt_entry_press_ratio', 'fan_exit_press', 'hpc_exit_press', 'hpc_entry_temp']
use_columns = W_var + xs_var + X_v_var + new_var

# use 컬럼에 climb_ cruise_ 붙이기
climb_use_columns = use_columns.copy()  
cruise_use_columns = use_columns.copy()
word = 'climb_'
word2 = 'cruise_'
for i in range(0,len(use_columns)):
    climb_use_columns[i] = word+use_columns[i]
    cruise_use_columns[i] = word2+use_columns[i]

# Climb 데이터 생성
df_prp_climb = data_prp(df_climb,climb_use_columns,'climb_')
df_prp_climb.to_csv(f"df_prp_climb_{file_order}.csv")

# Cruise 데이터 생성
df_prp_cruise = data_prp(df_cruise, cruise_use_columns, 'cruise_')
df_prp_cruise.to_csv(f"df_prp_cruise_{file_order}.csv")

# 데이터 결합
df_prp_cruise = df_prp_cruise.drop(['cruise_flight_time(min)','cruise_unit','cruise_cycle','cruise_Fc','cruise_fileNo',
                                    'cruise_hpt_yn','cruise_lpt_yn','cruise_fan_yn',
                                    'cruise_hpc_yn','cruise_lpc_yn','cruise_RUL'],axis=1)
df_prp_f = pd.concat((df_prp_cruise, df_prp_climb), axis=1 )

# 컬럼명 변경
df_prp_f.rename(columns = {'climb_flight_time(min)':'flight_time(min)','climb_unit':'unit','climb_Fc':'Fc','climb_fileNo':'fileNo'
                           ,'climb_cycle':'cycle','climb_hpt_yn':'hpt_yn'
                           ,'climb_lpt_yn':'lpt_yn','climb_fan_yn':'fan_yn'
                           ,'climb_hpc_yn':'hpc_yn','climb_lpc_yn':'lpc_yn'
                           ,'climb_rul':'rul'},inplace=True)

# 1개 사이클이전, 부품건강상태
df_prp_f['hpt_yn_shift1'] = df_prp_f['hpt_yn'].shift(1)
df_prp_f['lpt_yn_shift1'] = df_prp_f['lpt_yn'].shift(1)
df_prp_f['fan_yn_shift1'] = df_prp_f['fan_yn'].shift(1)
df_prp_f['hpc_yn_shift1'] = df_prp_f['hpc_yn'].shift(1)
df_prp_f['lpc_yn_shift1'] = df_prp_f['lpc_yn'].shift(1)
df_prp_f.fillna(0, inplace=True)
df_prp_f['hpt_yn_shift1'].loc[df_prp_f['cycle']==1] = 0
df_prp_f['lpt_yn_shift1'].loc[df_prp_f['cycle']==1] = 0
df_prp_f['fan_yn_shift1'].loc[df_prp_f['cycle']==1] = 0
df_prp_f['hpc_yn_shift1'].loc[df_prp_f['cycle']==1] = 0
df_prp_f['lpc_yn_shift1'].loc[df_prp_f['cycle']==1] = 0

df_prp_f.to_csv(f"df_prp_f_{file_order}.csv")

del(df_prp_f)
del(df_PVL)
del(df_all)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['failure_mode'].loc[(df['file_name'] == 'N-CMAPSS_DS02-006') & (df['unit'] == 2)] =  'hpt'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['failure_mode'].loc[(df['file_name'] == 'N-CMAPSS_DS02-006') & (df['unit'] == 5)] =  'hpt'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['failure_mode'].loc[(df['file_name'] == 'N-CMAPSS_DS02-006') & (df['unit'] == 10)] = 'hpt'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the docu

0/994
1/994
2/994
3/994
4/994
5/994
6/994
7/994
8/994
9/994
10/994
11/994
12/994
13/994
14/994
15/994
16/994
17/994
18/994
19/994
20/994
21/994
22/994
23/994
24/994
25/994
26/994
27/994
28/994
29/994
30/994
31/994
32/994
33/994
34/994
35/994
36/994
37/994
38/994
39/994
40/994
41/994
42/994
43/994
44/994
45/994
46/994
47/994
48/994
49/994
50/994
51/994
52/994
53/994
54/994
55/994
56/994
57/994
58/994
59/994
60/994
61/994
62/994
63/994
64/994
65/994
66/994
67/994
68/994
69/994
70/994
71/994
72/994
73/994
74/994
75/994
76/994
77/994
78/994
79/994
80/994
81/994
82/994
83/994
84/994
85/994
86/994
87/994
88/994
89/994
90/994
91/994
92/994
93/994
94/994
95/994
96/994
97/994
98/994
99/994
100/994
101/994
102/994
103/994
104/994
105/994
106/994
107/994
108/994
109/994
110/994
111/994
112/994
113/994
114/994
115/994
116/994
117/994
118/994
119/994
120/994
121/994
122/994
123/994
124/994
125/994
126/994
127/994
128/994
129/994
130/994
131/994
132/994
133/994
134/994
135/994
136/994
137/994
138/99

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prp_f['hpt_yn_shift1'].loc[df_prp_f['cycle']==1] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prp_f['lpt_yn_shift1'].loc[df_prp_f['cycle']==1] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prp_f['fan_yn_shift1'].loc[df_prp_f['cycle']==1] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  