In [2]:
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler, NearMiss, TomekLinks
import pandas as pd
import numpy as np

In [3]:
def resample_data(X_train, y_train, over_sampling=True, method=1,
               plot_path='/home/ar1/Desktop/plots/'):
    print('Initial dataset shape %s' % y_train["class"].value_counts())
    print("X train: " ,X_train)
    print("y train: " ,y_train)

    if over_sampling:
        if method == 1:
            sm = SMOTE(random_state=42)
            X_res, y_res = sm.fit_resample(X_train, y_train)
            print('Over Resampled train dataset shape %s' % y_res["class"].value_counts())
        elif method == 2:
            bsm = BorderlineSMOTE(random_state=42)
            X_res, y_res = bsm.fit_resample(X_train, y_train)
            print('Over Resampled train dataset shape %s' % y_res["class"].value_counts())
        elif method == 3:
            ad = ADASYN(random_state=42)
            X_res, y_res = ad.fit_resample(X_train, y_train)
            print('Over Resampled train dataset shape %s' % y_res["class"].value_counts())
        else:
            print("Wrong method number!")

    else:
        if method == 1:
            Rm = RandomUnderSampler(random_state=42)
            X_res, y_res = Rm.fit_resample(X_train, y_train)
            print('Over Resampled train dataset shape %s' % y_res["class"].value_counts())
        elif method == 2:
            Nm = NearMiss(version=3)
            X_res, y_res = Nm.fit_resample(X_train, y_train)
            print('Over Resampled train dataset shape %s' % y_res["class"].value_counts())
        elif method == 3:
            tml = TomekLinks()
            X_res, y_res = tml.fit_resample(X_train, y_train)
            print('Over Resampled train dataset shape %s' % y_res["class"].value_counts())
        else:
            print("Wrong method number!")

    print("New X train ", X_res)
    print("New y train shape", y_res)

    return X_res, y_res


In [31]:
df = pd.read_csv('6dof_disc_and_classification.csv')
df.drop('Unnamed: 0', inplace=True, axis=1)
df.keys()
ten = df.loc[df['Success_Rates'] == 10]
zero = df.loc[df['Success_Rates'] == 0]
new_df = pd.concat([ten.sample(10), zero.sample(20)]).reset_index()
new_df["class"] = np.where(new_df['Success_Rates'] > 0, 1, 0)

X = new_df.drop(columns=['Success_Rates', 'class'], axis=1)
y = new_df[['class']]
x_res,y_res = resample_data(X_train=X,y_train=y,over_sampling=True,method=1)

data_res = pd.concat([x_res,y_res],axis=1).reset_index(drop=True)
data_res.drop(columns=['index'], inplace=True)
new_df.drop(columns=['Success_Rates','index'], inplace=True)


Initial dataset shape 0    20
1    10
Name: class, dtype: int64
X train:      index  Joint2 type_pitch  Joint2 type_pris  Joint2 type_roll  \
0   97348                  0                 0                 1   
1   72113                  0                 1                 0   
2   44751                  0                 1                 0   
3   65827                  0                 1                 0   
4   89118                  0                 0                 1   
5   67795                  0                 1                 0   
6   73633                  0                 1                 0   
7   13879                  1                 0                 0   
8    9087                  1                 0                 0   
9   62956                  0                 1                 0   
10  30795                  1                 0                 0   
11  45540                  0                 1                 0   
12  67547                  0              

In [33]:
data_res

Unnamed: 0,Joint2 type_pitch,Joint2 type_pris,Joint2 type_roll,Joint3 type_pitch,Joint3 type_pris,Joint3 type_roll,Joint4 type_pitch,Joint4 type_pris,Joint4 type_roll,Joint5 type_pitch,...,Link4 length_0.7,Link5 length_0.1,Link5 length_0.3,Link5 length_0.5,Link5 length_0.7,Link6 length_0.1,Link6 length_0.3,Link6 length_0.5,Link6 length_0.7,class
0,0,0,1,1,0,0,1,0,0,0,...,0,1,0,0,0,0,1,0,0,1
1,0,1,0,1,0,0,0,0,1,1,...,0,1,0,0,0,0,1,0,0,1
2,0,1,0,1,0,0,1,0,0,0,...,1,0,1,0,0,0,1,0,0,1
3,0,1,0,1,0,0,1,0,0,1,...,0,1,0,0,0,1,0,0,0,1
4,0,0,1,0,1,0,0,1,0,1,...,1,0,1,0,0,1,0,0,0,1
5,0,1,0,0,1,0,1,0,0,1,...,0,1,0,0,0,1,0,0,0,1
6,0,1,0,0,0,1,0,1,0,1,...,0,1,0,0,0,0,1,0,0,1
7,1,0,0,0,0,1,0,1,0,0,...,0,1,0,0,0,0,1,0,0,1
8,1,0,0,0,1,0,1,0,0,1,...,0,0,0,0,1,0,1,0,0,1
9,0,1,0,1,0,0,1,0,0,0,...,0,0,0,1,0,1,0,0,0,1


In [43]:
tail = data_res.tail(1)

types = []
axis = []
lens = []
for index, row in tail.iterrows():
    i=2
    for i in range(2,7):
        print(i)
        ty = ['pitch','pris','roll']
        for t in ty:
            if row['Joint'+str(i)+' type_'+t] ==1:
                types.append(t)
        if i==2:
            ax = ['y','z']
        else:
            ax = ['x','y','z']
        for a in ax:
            if row['Joint'+str(i)+' axis_'+a] ==1:
                axis.append(a)
        ly = ['0.1','0.3','0.5','0.7']
        for l in ly:
            if row['Link'+str(i)+' length_'+l] ==1:
                lens.append(l)

print(types)
print(axis)
print(lens)
pd.set_option('display.max_columns', None)
tail

2
3
4
5
6
['pris', 'pitch', 'pitch', 'roll']
['y', 'x', 'y']
[]


Unnamed: 0,Joint2 type_pitch,Joint2 type_pris,Joint2 type_roll,Joint3 type_pitch,Joint3 type_pris,Joint3 type_roll,Joint4 type_pitch,Joint4 type_pris,Joint4 type_roll,Joint5 type_pitch,Joint5 type_pris,Joint5 type_roll,Joint6 type_pitch,Joint6 type_pris,Joint6 type_roll,Joint2 axis_y,Joint2 axis_z,Joint3 axis_x,Joint3 axis_y,Joint3 axis_z,Joint4 axis_x,Joint4 axis_y,Joint4 axis_z,Joint5 axis_x,Joint5 axis_y,Joint5 axis_z,Joint6 axis_x,Joint6 axis_y,Joint6 axis_z,Link2 length_0.1,Link2 length_0.3,Link2 length_0.5,Link2 length_0.7,Link3 length_0.1,Link3 length_0.3,Link3 length_0.5,Link3 length_0.7,Link4 length_0.1,Link4 length_0.3,Link4 length_0.5,Link4 length_0.7,Link5 length_0.1,Link5 length_0.3,Link5 length_0.5,Link5 length_0.7,Link6 length_0.1,Link6 length_0.3,Link6 length_0.5,Link6 length_0.7,class
39,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [13]:
temp = data_res.head(30)

In [23]:
new_x = pd.concat([new_df,temp]).drop_duplicates(keep=False).reset_index()
# new_x.drop(columns=['level_0', 'index'], inplace=True)
# new_x = pd.concat([x_res, X]).duplicated(keep=False)
print(new_x)

Empty DataFrame
Columns: [index, Joint2 type_pitch, Joint2 type_pris, Joint2 type_roll, Joint3 type_pitch, Joint3 type_pris, Joint3 type_roll, Joint4 type_pitch, Joint4 type_pris, Joint4 type_roll, Joint5 type_pitch, Joint5 type_pris, Joint5 type_roll, Joint6 type_pitch, Joint6 type_pris, Joint6 type_roll, Joint2 axis_y, Joint2 axis_z, Joint3 axis_x, Joint3 axis_y, Joint3 axis_z, Joint4 axis_x, Joint4 axis_y, Joint4 axis_z, Joint5 axis_x, Joint5 axis_y, Joint5 axis_z, Joint6 axis_x, Joint6 axis_y, Joint6 axis_z, Link2 length_0.1, Link2 length_0.3, Link2 length_0.5, Link2 length_0.7, Link3 length_0.1, Link3 length_0.3, Link3 length_0.5, Link3 length_0.7, Link4 length_0.1, Link4 length_0.3, Link4 length_0.5, Link4 length_0.7, Link5 length_0.1, Link5 length_0.3, Link5 length_0.5, Link5 length_0.7, Link6 length_0.1, Link6 length_0.3, Link6 length_0.5, Link6 length_0.7, class]
Index: []

[0 rows x 51 columns]
