In [1]:
import pandas as pd
import itertools
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Disabling this warning as we only need part of the data
# So we copy 'measurements_df' -> 'measurements_subset'
pd.options.mode.chained_assignment = None

In [4]:
masts_dic_df = pd.read_excel('Data/Elastic Measurements Jul22/DIC Elastic Measurements JUL22/Masts/masts_DIC_results.xls')
wings_dic_df = pd.read_excel('Data/Elastic Measurements Jul22/DIC Elastic Measurements JUL22/Wings/wings_DIC_results Moran.xls')

In [5]:
def calc_symetric(df):
    is_symetric_bend = []
    is_symetric_twist_front = []
    is_symetric_twist_back = []
    threshold = 0.1
    for i,row in df.iterrows():
        is_symetric_bend.append(abs(abs(row['bend intercepts_1']-row['bend intercepts_2']) -
                                abs(row['bend intercepts_3']-row['bend intercepts_4'])) < threshold)
        is_symetric_twist_front.append(abs(abs(row['twistFront intercepts_1']-row['twistFront intercepts_2']) -
                                    abs(row['twistFront intercepts_3']-row['twistFront intercepts_4'])) < threshold)
        is_symetric_twist_back.append(abs(abs(row['twistBack intercepts_1']-row['twistBack intercepts_2']) -
                                    abs(row['twistBack intercepts_3']-row['twistBack intercepts_4'])) < threshold)
    df['is_symetric_bend'] = is_symetric_bend
    df['is_symetric_twist_front'] = is_symetric_twist_front
    df['is_symetric_twist_back'] = is_symetric_twist_back

calc_symetric(wings_dic_df)
#In masts file I have only 3 columns for each measure, how to check if symmetric?

In [6]:
features_list = {}
A = ['bend','twistBack', 'twistFront']
B = ['slopes','intercepts']
for comb in itertools.product(A,B):
    feature_name = f'{comb[0]} {comb[1]}'
    #same features for both datasets
    features_list[feature_name] = [col for col in wings_dic_df.columns if feature_name in col]

In [7]:
def normalize_df(df):
    df = df.set_index('itemName')
    scaler = MinMaxScaler()
    normalized_df = scaler.fit_transform(df)
    df = pd.DataFrame(normalized_df, columns=df.columns, index=df.index)
    return df

In [12]:
norm_masts_dic_df = normalize_df(masts_dic_df)
norm_wings_dic_df = normalize_df(wings_dic_df)
print(norm_masts_dic_df)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print(norm_wings_dic_df)

          bend slopes_1  bend slopes_2  bend slopes_3  bend intercepts_1  \
itemName                                                                   
DP2            0.975761       0.366909       0.519142           0.775818   
DP3            0.999055       0.772565       0.818974           0.707621   
MM4            1.000000       0.739492       0.745969           0.686958   
NG1            0.988198       0.717063       0.695067           0.844903   
SK4            0.996985       0.794836       0.943074           0.760614   
ST2            0.979732       0.290169       0.400682           0.000000   
ST3            0.990535       0.748908       0.604264           0.879922   
TR3            0.970847       0.281714       0.156968           0.867653   
TR5            0.950744       0.000000       0.000000           1.000000   
TR6            0.000000       1.000000       1.000000           0.975064   
TV1            0.989676       0.556235       0.592863           0.742835   
YC1         

In [9]:
# Find a way to use symmetric feature for score calculating
# How to handle outliers such as masts file->TR6->bend slopes_1? use different normalization system?
def find_scores(df):
    for key, value in features_list.items():
        scores = []
        for index, row in df[value].iterrows():
            scores.append(row.mean()*10)
        df[f'{key}_score'] = scores

find_scores(norm_wings_dic_df)
print(norm_wings_dic_df)

          bend slopes_1  bend slopes_2  bend slopes_3  bend slopes_4  \
itemName                                                               
DP2            0.474576       0.144672       0.473061       0.117890   
MM3            0.659969       0.515439       0.634003       0.607555   
MM4            0.444614       0.266712       0.334752       0.351196   
NG1            0.500593       0.340951       0.462963       0.441396   
NM1            0.000000       0.597290       0.000000       0.593629   
NM2            0.455417       0.211991       0.399988       0.179938   
SK3            0.535796       0.241127       0.503117       0.152317   
ST2            0.623792       0.503806       0.655743       0.675879   
ST3            0.212471       0.197529       0.262755       0.135784   
TR3            1.000000       1.000000       0.970789       1.000000   
TR4            0.715299       0.611970       0.726804       0.628955   
TR5            0.621474       0.448179       0.613004       0.34

In [10]:
print(norm_wings_dic_df)
print(norm_masts_dic_df)

%store norm_wings_dic_df
%store norm_masts_dic_df

          bend slopes_1  bend slopes_2  bend slopes_3  bend slopes_4  \
itemName                                                               
DP2            0.474576       0.144672       0.473061       0.117890   
MM3            0.659969       0.515439       0.634003       0.607555   
MM4            0.444614       0.266712       0.334752       0.351196   
NG1            0.500593       0.340951       0.462963       0.441396   
NM1            0.000000       0.597290       0.000000       0.593629   
NM2            0.455417       0.211991       0.399988       0.179938   
SK3            0.535796       0.241127       0.503117       0.152317   
ST2            0.623792       0.503806       0.655743       0.675879   
ST3            0.212471       0.197529       0.262755       0.135784   
TR3            1.000000       1.000000       0.970789       1.000000   
TR4            0.715299       0.611970       0.726804       0.628955   
TR5            0.621474       0.448179       0.613004       0.34