Import libraries requred for the script

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
from numpy.polynomial import polynomial as npp
from scipy.stats import linregress

Read the CSV files

In [None]:
df_pa = pd.read_csv('VMI_Data_PA_V02_AK6.csv')
df_bp1 = pd.read_csv('VMI_Data_BP1_V02_AK6.csv')
df_bp2 = pd.read_csv('VMI_Data_BP2_V02_AK6.csv')

Check the columns in the data

In [None]:
print(df_pa.columns)
print(df_bp1.columns)
print(df_bp2.columns)

Sort the data by timestamp

In [None]:
df_pa.DATE_TIME_STAMP = pd.to_datetime(df_pa.DATE_TIME_STAMP, format="%m/%d/%Y %H:%M:%S.%f")
df_bp1.DATE_TIME_STAMP = pd.to_datetime(df_bp1.DATE_TIME_STAMP, format="%m/%d/%Y %H:%M:%S.%f")
df_bp2.DATE_TIME_STAMP = pd.to_datetime(df_bp2.DATE_TIME_STAMP, format="%m/%d/%Y %H:%M:%S.%f")

Check missing data

In [None]:
total = df_pa.isnull().sum().sort_values(ascending=False)
percent = (df_pa.isnull().sum()/df_pa.isnull().count()*100).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
print(missing_data.head(10))
df_pa = df_pa.dropna()

total = df_bp1.isnull().sum().sort_values(ascending=False)
percent = (df_bp1.isnull().sum()/df_bp1.isnull().count()*100).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
print(missing_data.head(10))
df_bp1 = df_bp1.dropna()

total = df_bp2.isnull().sum().sort_values(ascending=False)
percent = (df_bp2.isnull().sum()/df_bp2.isnull().count()*100).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
print(missing_data.head(10))
df_bp2 = df_bp2.dropna()

In [None]:
total = df_pa.isnull().sum().sort_values(ascending=False)
percent = (df_pa.isnull().sum()/df_pa.isnull().count()*100).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
print(missing_data.head(20))

total = df_bp1.isnull().sum().sort_values(ascending=False)
percent = (df_bp1.isnull().sum()/df_bp1.isnull().count()*100).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
print(missing_data.head(20))

total = df_bp2.isnull().sum().sort_values(ascending=False)
percent = (df_bp2.isnull().sum()/df_bp2.isnull().count()*100).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
print(missing_data.head(20))

In [None]:
df_pa = df_pa.sort_values('DATE_TIME_STAMP')
df_bp1 = df_bp1.sort_values('DATE_TIME_STAMP')
df_bp2 = df_bp2.sort_values('DATE_TIME_STAMP')

Adding ID column for slope calculations
Probably do not need it 

In [None]:
df_pa.insert(0, 'ID', range(1, 1 + len(df_pa)))
df_bp1.insert(0, 'ID', range(1, 1 + len(df_bp1)))
df_bp2.insert(0, 'ID', range(1, 1 + len(df_bp2)))

In [None]:
df_pa.reset_index(drop=True)
df_bp1.reset_index(drop=True)
df_bp2.reset_index(drop=True)

In [None]:

def polyfit(x):
    return npp.polyfit(list(range(len(x))), x, 1)[0]

def linearregress(x):
    slope, intercept, r_value, p_value, std_err = linregress(list(range(len(x))), x)
    return slope


In [None]:
df_pa['LFT_SPLICE_DELTA'] = df_pa['LFT_SPLICE_LENGTH'] - (df_pa['SPLICE_LIMIT_LOW'] + df_pa['SPLICE_LIMIT_HIGH'])/2
df_pa['LFT_SPLICE_PREV'] = df_pa.LFT_SPLICE_DELTA.shift(1)
df_pa['LFT_SPLICE_MA5'] = df_pa.LFT_SPLICE_PREV.rolling(window=5,min_periods=1).mean()
df_pa['LFT_SPLICE_MA10'] = df_pa.LFT_SPLICE_PREV.rolling(window=10,min_periods=1).mean()
df_pa['LFT_SPLICE_MA20'] = df_pa.LFT_SPLICE_PREV.rolling(window=20,min_periods=1).mean()
df_pa['LFT_SPLICE_MA50'] = df_pa.LFT_SPLICE_PREV.rolling(window=50,min_periods=1).mean()
df_pa['LFT_SPLICE_SLOPE5'] = df_pa.LFT_SPLICE_PREV.rolling(window=5,min_periods=1).apply(linearregress)
df_pa['LFT_SPLICE_SLOPE10'] = df_pa.LFT_SPLICE_PREV.rolling(window=10,min_periods=1).apply(linearregress)
df_pa['LFT_SPLICE_SLOPE20'] = df_pa.LFT_SPLICE_PREV.rolling(window=20,min_periods=1).apply(linearregress)
df_pa['LFT_SPLICE_SLOPE50'] = df_pa.LFT_SPLICE_PREV.rolling(window=50,min_periods=1).apply(linearregress)


df_pa['MID_SPLICE_DELTA'] = df_pa['MID_SPLICE_LENGTH'] - (df_pa['SPLICE_LIMIT_LOW'] + df_pa['SPLICE_LIMIT_HIGH'])/2
df_pa['MID_SPLICE_PREV'] = df_pa.MID_SPLICE_DELTA.shift(1)
df_pa['MID_SPLICE_MA5'] = df_pa.MID_SPLICE_PREV.rolling(window=5,min_periods=1).mean()
df_pa['MID_SPLICE_MA10'] = df_pa.MID_SPLICE_PREV.rolling(window=10,min_periods=1).mean()
df_pa['MID_SPLICE_MA20'] = df_pa.MID_SPLICE_PREV.rolling(window=20,min_periods=1).mean()
df_pa['MID_SPLICE_MA50'] = df_pa.MID_SPLICE_PREV.rolling(window=50,min_periods=1).mean()
df_pa['MID_SPLICE_SLOPE5'] = df_pa.MID_SPLICE_PREV.rolling(window=5,min_periods=1).apply(linearregress)
df_pa['MID_SPLICE_SLOPE10'] = df_pa.MID_SPLICE_PREV.rolling(window=10,min_periods=1).apply(linearregress)
df_pa['MID_SPLICE_SLOPE20'] = df_pa.MID_SPLICE_PREV.rolling(window=20,min_periods=1).apply(linearregress)
df_pa['MID_SPLICE_SLOPE50'] = df_pa.MID_SPLICE_PREV.rolling(window=50,min_periods=1).apply(linearregress)


df_pa['RHT_SPLICE_DELTA'] = df_pa['RHT_SPLICE_LENGTH'] - (df_pa['SPLICE_LIMIT_LOW'] + df_pa['SPLICE_LIMIT_HIGH'])/2
df_pa['RHT_SPLICE_PREV'] = df_pa.RHT_SPLICE_DELTA.shift(1)
df_pa['RHT_SPLICE_MA5'] = df_pa.RHT_SPLICE_PREV.rolling(window=5,min_periods=1).mean()
df_pa['RHT_SPLICE_MA10'] = df_pa.RHT_SPLICE_PREV.rolling(window=10,min_periods=1).mean()
df_pa['RHT_SPLICE_MA20'] = df_pa.RHT_SPLICE_PREV.rolling(window=20,min_periods=1).mean()
df_pa['RHT_SPLICE_MA50'] = df_pa.RHT_SPLICE_PREV.rolling(window=50,min_periods=1).mean()
df_pa['RHT_SPLICE_SLOPE5'] = df_pa.RHT_SPLICE_PREV.rolling(window=5,min_periods=1).apply(linearregress)
df_pa['RHT_SPLICE_SLOPE10'] = df_pa.RHT_SPLICE_PREV.rolling(window=10,min_periods=1).apply(linearregress)
df_pa['RHT_SPLICE_SLOPE20'] = df_pa.RHT_SPLICE_PREV.rolling(window=20,min_periods=1).apply(linearregress)
df_pa['RHT_SPLICE_SLOPE50'] = df_pa.RHT_SPLICE_PREV.rolling(window=50,min_periods=1).apply(linearregress)

df_bp1['LFT_SPLICE_DELTA'] = df_bp1['LFT_SPLICE_LENGTH'] - (df_bp1['SPLICE_LIMIT_LOW'] + df_bp1['SPLICE_LIMIT_HIGH'])/2
df_bp1['LFT_SPLICE_PREV'] = df_bp1.LFT_SPLICE_DELTA.shift(1)
df_bp1['LFT_SPLICE_MA5'] = df_bp1.LFT_SPLICE_PREV.rolling(window=5,min_periods=1).mean()
df_bp1['LFT_SPLICE_MA10'] = df_bp1.LFT_SPLICE_PREV.rolling(window=10,min_periods=1).mean()
df_bp1['LFT_SPLICE_MA20'] = df_bp1.LFT_SPLICE_PREV.rolling(window=20,min_periods=1).mean()
df_bp1['LFT_SPLICE_MA50'] = df_bp1.LFT_SPLICE_PREV.rolling(window=50,min_periods=1).mean()
df_bp1['LFT_SPLICE_SLOPE5'] = df_bp1.LFT_SPLICE_PREV.rolling(window=5,min_periods=1).apply(linearregress)
df_bp1['LFT_SPLICE_SLOPE10'] = df_bp1.LFT_SPLICE_PREV.rolling(window=10,min_periods=1).apply(linearregress)
df_bp1['LFT_SPLICE_SLOPE20'] = df_bp1.LFT_SPLICE_PREV.rolling(window=20,min_periods=1).apply(linearregress)
df_bp1['LFT_SPLICE_SLOPE50'] = df_bp1.LFT_SPLICE_PREV.rolling(window=50,min_periods=1).apply(linearregress)


df_bp1['MID_SPLICE_DELTA'] = df_bp1['MID_SPLICE_LENGTH'] - (df_bp1['SPLICE_LIMIT_LOW'] + df_bp1['SPLICE_LIMIT_HIGH'])/2
df_bp1['MID_SPLICE_PREV'] = df_bp1.MID_SPLICE_DELTA.shift(1)
df_bp1['MID_SPLICE_MA5'] = df_bp1.MID_SPLICE_PREV.rolling(window=5,min_periods=1).mean()
df_bp1['MID_SPLICE_MA10'] = df_bp1.MID_SPLICE_PREV.rolling(window=10,min_periods=1).mean()
df_bp1['MID_SPLICE_MA20'] = df_bp1.MID_SPLICE_PREV.rolling(window=20,min_periods=1).mean()
df_bp1['MID_SPLICE_MA50'] = df_bp1.MID_SPLICE_PREV.rolling(window=50,min_periods=1).mean()
df_bp1['MID_SPLICE_SLOPE5'] = df_bp1.MID_SPLICE_PREV.rolling(window=5,min_periods=1).apply(linearregress)
df_bp1['MID_SPLICE_SLOPE10'] = df_bp1.MID_SPLICE_PREV.rolling(window=10,min_periods=1).apply(linearregress)
df_bp1['MID_SPLICE_SLOPE20'] = df_bp1.MID_SPLICE_PREV.rolling(window=20,min_periods=1).apply(linearregress)
df_bp1['MID_SPLICE_SLOPE50'] = df_bp1.MID_SPLICE_PREV.rolling(window=50,min_periods=1).apply(linearregress)


df_bp1['RHT_SPLICE_DELTA'] = df_bp1['RHT_SPLICE_LENGTH'] - (df_bp1['SPLICE_LIMIT_LOW'] + df_bp1['SPLICE_LIMIT_HIGH'])/2
df_bp1['RHT_SPLICE_PREV'] = df_bp1.RHT_SPLICE_DELTA.shift(1)
df_bp1['RHT_SPLICE_MA5'] = df_bp1.RHT_SPLICE_PREV.rolling(window=5,min_periods=1).mean()
df_bp1['RHT_SPLICE_MA10'] = df_bp1.RHT_SPLICE_PREV.rolling(window=10,min_periods=1).mean()
df_bp1['RHT_SPLICE_MA20'] = df_bp1.RHT_SPLICE_PREV.rolling(window=20,min_periods=1).mean()
df_bp1['RHT_SPLICE_MA50'] = df_bp1.RHT_SPLICE_PREV.rolling(window=50,min_periods=1).mean()
df_bp1['RHT_SPLICE_SLOPE5'] = df_bp1.RHT_SPLICE_PREV.rolling(window=5,min_periods=1).apply(linearregress)
df_bp1['RHT_SPLICE_SLOPE10'] = df_bp1.RHT_SPLICE_PREV.rolling(window=10,min_periods=1).apply(linearregress)
df_bp1['RHT_SPLICE_SLOPE20'] = df_bp1.RHT_SPLICE_PREV.rolling(window=20,min_periods=1).apply(linearregress)
df_bp1['RHT_SPLICE_SLOPE50'] = df_bp1.RHT_SPLICE_PREV.rolling(window=50,min_periods=1).apply(linearregress)

df_bp2['LFT_SPLICE_DELTA'] = df_bp2['LFT_SPLICE_LENGTH'] - (df_bp2['SPLICE_LIMIT_LOW'] + df_bp2['SPLICE_LIMIT_HIGH'])/2
df_bp2['LFT_SPLICE_PREV'] = df_bp2.LFT_SPLICE_DELTA.shift(1)
df_bp2['LFT_SPLICE_MA5'] = df_bp2.LFT_SPLICE_PREV.rolling(window=5,min_periods=1).mean()
df_bp2['LFT_SPLICE_MA10'] = df_bp2.LFT_SPLICE_PREV.rolling(window=10,min_periods=1).mean()
df_bp2['LFT_SPLICE_MA20'] = df_bp2.LFT_SPLICE_PREV.rolling(window=20,min_periods=1).mean()
df_bp2['LFT_SPLICE_MA50'] = df_bp2.LFT_SPLICE_PREV.rolling(window=50,min_periods=1).mean()
df_bp2['LFT_SPLICE_SLOPE5'] = df_bp2.LFT_SPLICE_PREV.rolling(window=5,min_periods=1).apply(linearregress)
df_bp2['LFT_SPLICE_SLOPE10'] = df_bp2.LFT_SPLICE_PREV.rolling(window=10,min_periods=1).apply(linearregress)
df_bp2['LFT_SPLICE_SLOPE20'] = df_bp2.LFT_SPLICE_PREV.rolling(window=20,min_periods=1).apply(linearregress)
df_bp2['LFT_SPLICE_SLOPE50'] = df_bp2.LFT_SPLICE_PREV.rolling(window=50,min_periods=1).apply(linearregress)


df_bp2['MID_SPLICE_DELTA'] = df_bp2['MID_SPLICE_LENGTH'] - (df_bp2['SPLICE_LIMIT_LOW'] + df_bp2['SPLICE_LIMIT_HIGH'])/2
df_bp2['MID_SPLICE_PREV'] = df_bp2.MID_SPLICE_DELTA.shift(1)
df_bp2['MID_SPLICE_MA5'] = df_bp2.MID_SPLICE_PREV.rolling(window=5,min_periods=1).mean()
df_bp2['MID_SPLICE_MA10'] = df_bp2.MID_SPLICE_PREV.rolling(window=10,min_periods=1).mean()
df_bp2['MID_SPLICE_MA20'] = df_bp2.MID_SPLICE_PREV.rolling(window=20,min_periods=1).mean()
df_bp2['MID_SPLICE_MA50'] = df_bp2.MID_SPLICE_PREV.rolling(window=50,min_periods=1).mean()
df_bp2['MID_SPLICE_SLOPE5'] = df_bp2.MID_SPLICE_PREV.rolling(window=5,min_periods=1).apply(linearregress)
df_bp2['MID_SPLICE_SLOPE10'] = df_bp2.MID_SPLICE_PREV.rolling(window=10,min_periods=1).apply(linearregress)
df_bp2['MID_SPLICE_SLOPE20'] = df_bp2.MID_SPLICE_PREV.rolling(window=20,min_periods=1).apply(linearregress)
df_bp2['MID_SPLICE_SLOPE50'] = df_bp2.MID_SPLICE_PREV.rolling(window=50,min_periods=1).apply(linearregress)


df_bp2['RHT_SPLICE_DELTA'] = df_bp2['RHT_SPLICE_LENGTH'] - (df_bp2['SPLICE_LIMIT_LOW'] + df_bp2['SPLICE_LIMIT_HIGH'])/2
df_bp2['RHT_SPLICE_PREV'] = df_bp2.RHT_SPLICE_DELTA.shift(1)
df_bp2['RHT_SPLICE_MA5'] = df_bp2.RHT_SPLICE_PREV.rolling(window=5,min_periods=1).mean()
df_bp2['RHT_SPLICE_MA10'] = df_bp2.RHT_SPLICE_PREV.rolling(window=10,min_periods=1).mean()
df_bp2['RHT_SPLICE_MA20'] = df_bp2.RHT_SPLICE_PREV.rolling(window=20,min_periods=1).mean()
df_bp2['RHT_SPLICE_MA50'] = df_bp2.RHT_SPLICE_PREV.rolling(window=50,min_periods=1).mean()
df_bp2['RHT_SPLICE_SLOPE5'] = df_bp2.RHT_SPLICE_PREV.rolling(window=5,min_periods=1).apply(linearregress)
df_bp2['RHT_SPLICE_SLOPE10'] = df_bp2.RHT_SPLICE_PREV.rolling(window=10,min_periods=1).apply(linearregress)
df_bp2['RHT_SPLICE_SLOPE20'] = df_bp2.RHT_SPLICE_PREV.rolling(window=20,min_periods=1).apply(linearregress)
df_bp2['RHT_SPLICE_SLOPE50'] = df_bp2.RHT_SPLICE_PREV.rolling(window=50,min_periods=1).apply(linearregress)


In [None]:
# Delete first 50 rows - so that SMA and Slopes are consistent
df_pa = df_pa.iloc[50:]
df_bp1 = df_bp1.iloc[50:]
df_bp2 = df_bp2.iloc[50:]

In [None]:
# lft_bins = [df_pa['LFT_SPLICE_LENGTH'].min(),3.5, (3.5 + df_pa['LFT_SPLICE_LENGTH'].mean())/2,
#             df_pa['LFT_SPLICE_LENGTH'].mean(),(11.0 + df_pa['LFT_SPLICE_LENGTH'].mean())/2,
#             11.0,df_pa['LFT_SPLICE_LENGTH'].max()]

# mid_bins = [df_pa['MID_SPLICE_LENGTH'].min(),4.9, (4.9 + df_pa['MID_SPLICE_LENGTH'].mean())/2,
#             df_pa['MID_SPLICE_LENGTH'].mean(), (18.0 + df_pa['MID_SPLICE_LENGTH'].mean())/2,
#             18.0,df_pa['MID_SPLICE_LENGTH'].max()]

# rht_bins = [df_pa['RHT_SPLICE_LENGTH'].min(),3.5,(3.5 + df_pa['RHT_SPLICE_LENGTH'].mean())/2,
#             df_pa['RHT_SPLICE_LENGTH'].mean(),(11.0 + df_pa['RHT_SPLICE_LENGTH'].mean())/2,
#             11.0,df_pa['RHT_SPLICE_LENGTH'].max()]

lft_bins = [df_pa['LFT_SPLICE_LENGTH'].min(),3.5, (3.5 + (11.0 - 3.5)/3),
            (11.0 - (11.0 - 3.5)/3), 11.0, df_pa['LFT_SPLICE_LENGTH'].max()]

mid_bins = [df_pa['MID_SPLICE_LENGTH'].min(),4.9, (4.9 + (18.0 - 4.9)/3),
            (18.0 - (18.0 - 4.9)/3), 18.0,df_pa['MID_SPLICE_LENGTH'].max()]

rht_bins = [df_pa['RHT_SPLICE_LENGTH'].min(),3.5, (3.5 + (11.0 - 3.5)/3),
             (11.0 - (11.0 - 3.5)/3), 11.0, df_pa['RHT_SPLICE_LENGTH'].max()]

In [None]:
bin_names=['BadNarrow','OKNarrow','Good','OKWide','BadWide']

In [None]:
df_pa['LFT_SPLICE_GRADE'] = pd.cut(df_pa['LFT_SPLICE_LENGTH'],lft_bins, labels=bin_names, include_lowest=True) #.str.replace('H','')
df_pa['MID_SPLICE_GRADE'] = pd.cut(df_pa['MID_SPLICE_LENGTH'],mid_bins, labels=bin_names, include_lowest=True) #.str.replace('H','')
df_pa['RHT_SPLICE_GRADE'] = pd.cut(df_pa['RHT_SPLICE_LENGTH'],rht_bins, labels=bin_names, include_lowest=True) #.str.replace('H','')
df_pa['SPLICE_GRADE'] = df_pa['LFT_SPLICE_GRADE']
df_pa['SPLICE_GRADE'][((df_pa.MID_SPLICE_GRADE == 'OKNarrow') || (df_pa.MID_SPLICE_GRADE == 'OKWide')) & (df_pa.SPLICE_GRADE == 'Good')] = df_pa['MID_SPLICE_GRADE'] 
df_pa['SPLICE_GRADE'][((df_pa.MID_SPLICE_GRADE == 'BadNarrow') || (df_pa.MID_SPLICE_GRADE == 'BadWide')) & (df_pa.SPLICE_GRADE != 'Bad')] = df_pa['MID_SPLICE_GRADE']
df_pa['SPLICE_GRADE'][((df_pa.RHT_SPLICE_GRADE == 'OKNarrow') || (df_pa.RHT_SPLICE_GRADE == 'OKWide')) & (df_pa.SPLICE_GRADE == 'Good')] = df_pa['RHT_SPLICE_GRADE'] 
df_pa['SPLICE_GRADE'][((df_pa.RHT_SPLICE_GRADE == 'BadNarrow') || (df_pa.RHT_SPLICE_GRADE == 'BadWide')) & (df_pa.SPLICE_GRADE != 'Bad')] = df_pa['RHT_SPLICE_GRADE']

df_bp1['LFT_SPLICE_GRADE'] = pd.cut(df_bp1['LFT_SPLICE_LENGTH'],lft_bins, labels=bin_names, include_lowest=True) #.str.replace('H','')
df_bp1['MID_SPLICE_GRADE'] = pd.cut(df_bp1['MID_SPLICE_LENGTH'],mid_bins, labels=bin_names, include_lowest=True) #.str.replace('H','')
df_bp1['RHT_SPLICE_GRADE'] = pd.cut(df_bp1['RHT_SPLICE_LENGTH'],rht_bins, labels=bin_names, include_lowest=True) #.str.replace('H','')
df_bp1['SPLICE_GRADE'] = df_bp1['LFT_SPLICE_GRADE']
df_bp1['SPLICE_GRADE'][((df_bp1.MID_SPLICE_GRADE == 'OKNarrow') || (df_bp1.MID_SPLICE_GRADE == 'OKWide')) & (df_bp1.SPLICE_GRADE == 'Good')] = df_bp1['MID_SPLICE_GRADE'] 
df_bp1['SPLICE_GRADE'][((df_bp1.MID_SPLICE_GRADE == 'BadNarrow') || (df_bp1.MID_SPLICE_GRADE == 'BadWide')) & (df_bp1.SPLICE_GRADE != 'Bad')] = df_bp1['MID_SPLICE_GRADE']
df_bp1['SPLICE_GRADE'][((df_bp1.RHT_SPLICE_GRADE == 'OKNarrow') || (df_bp1.RHT_SPLICE_GRADE == 'OKWide')) & (df_bp1.SPLICE_GRADE == 'Good')] = df_bp1['RHT_SPLICE_GRADE'] 
df_bp1['SPLICE_GRADE'][((df_bp1.RHT_SPLICE_GRADE == 'BadNarrow') || (df_bp1.RHT_SPLICE_GRADE == 'BadWide')) & (df_bp1.SPLICE_GRADE != 'Bad')] = df_bp1['RHT_SPLICE_GRADE']

df_bp2['LFT_SPLICE_GRADE'] = pd.cut(df_bp2['LFT_SPLICE_LENGTH'],lft_bins, labels=bin_names, include_lowest=True) #.str.replace('H','')
df_bp2['MID_SPLICE_GRADE'] = pd.cut(df_bp2['MID_SPLICE_LENGTH'],mid_bins, labels=bin_names, include_lowest=True) #.str.replace('H','')
df_bp2['RHT_SPLICE_GRADE'] = pd.cut(df_bp2['RHT_SPLICE_LENGTH'],rht_bins, labels=bin_names, include_lowest=True) #.str.replace('H','')
df_bp2['SPLICE_GRADE'] = df_bp2['LFT_SPLICE_GRADE']
df_bp2['SPLICE_GRADE'][((df_bp2.MID_SPLICE_GRADE == 'OKNarrow') || (df_bp2.MID_SPLICE_GRADE == 'OKWide')) & (df_bp2.SPLICE_GRADE == 'Good')] = df_bp2['MID_SPLICE_GRADE'] 
df_bp2['SPLICE_GRADE'][((df_bp2.MID_SPLICE_GRADE == 'BadNarrow') || (df_bp2.MID_SPLICE_GRADE == 'BadWide')) & (df_bp2.SPLICE_GRADE != 'Bad')] = df_bp2['MID_SPLICE_GRADE']
df_bp2['SPLICE_GRADE'][((df_bp2.RHT_SPLICE_GRADE == 'OKNarrow') || (df_bp2.RHT_SPLICE_GRADE == 'OKWide')) & (df_bp2.SPLICE_GRADE == 'Good')] = df_bp2['RHT_SPLICE_GRADE'] 
df_bp2['SPLICE_GRADE'][((df_bp2.RHT_SPLICE_GRADE == 'BadNarrow') || (df_bp2.RHT_SPLICE_GRADE == 'BadWide')) & (df_bp2.SPLICE_GRADE != 'Bad')] = df_bp2['RHT_SPLICE_GRADE']


In [None]:
#df_pa['LFT_SPLICE_GRADE']
# one_hot=pd.get_dummies(df_pa['LFT_SPLICE_GRADE'])
# df_pa = df_pa.drop('LFT_SPLICE_GRADE', axis = 1)
# df_pa = df_pa.join(one_hot)

In [None]:
#df_pa.head
df_pa.to_csv('VMI_Data_PA_V02_AK6_mod01.csv', header=True, index=False)
df_bp1.to_csv('VMI_Data_BP1_V02_AK6_mod01.csv', header=True, index=False)
df_bp2.to_csv('VMI_Data_BP2_V02_AK6_mod01.csv', header=True, index=False)