In [1]:
import glob
import random
import sys
from itertools import chain
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, confusion_matrix

from thundersvm import SVC
from tqdm import tqdm

np.random.seed(0)
random.seed(0)

### TEST TRAIN INDEX GENERATION FOR FOLDS

In [2]:
attribute_map = [
   # {"Race": ["African", "Asian", "Indian", "Caucasian"]},
    {"skintype": ["type1", "type2", "type3", "type4", "type5", "type6"]},
    {"eye": ["normal", "narrow"]},
    {"haircolor": ["red", "black", "gray", "brown", "blonde"]},
    {"hairtype": ["straight", "wavy", "bald", "curly"]},
    {"lips": ["small", "big"]},
    {"nose": ["wide", "narrow"]},
]

In [3]:
vgg = pd.read_csv("/mnt/HDD/FaceDatasetCenter/metadata/VGGFace2_metadata_FDA.csv")
vgg_test = vgg[vgg.type == "test"]
vgg_test.head()

vgg_image = pd.read_csv(
    "/mnt/HDD/FaceDatasetCenter/metadata/VGGFace2_image_meta_test.csv"
)
vgg_image_test = vgg_image[vgg_image.type == "test"]
vgg_image_test = vgg_image_test.sort_values(by="file")
vgg_image_test.head()

Unnamed: 0,file,Class_ID,Sample_Num,Gender,Race,skintype,type,lips,nose,hairtype,haircolor,GenderM,eye
0,n000001/0001_01.jpg,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow
1,n000001/0002_01.jpg,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow
2,n000001/0003_01.jpg,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow
3,n000001/0004_01.jpg,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow
4,n000001/0005_01.jpg,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow


In [4]:
NUM_FOLDS = 3
TEST_SAMPLE_SIZE = 50
folds_folder = Path("folds")
folds_folder.mkdir(exist_ok=True)


In [5]:
def generate_fold():
    
    all_folds = []
    for fold in range(0, NUM_FOLDS):
        print(TEST_SAMPLE_SIZE * NUM_FOLDS)
        print(f"Fold {fold+1}")
        class_folds = {"train": [], "test": []}
        for i, group in vgg_image_test.groupby("Class_ID"):
            num_samples = group.shape[0]
            test_mask = np.zeros(num_samples, dtype=np.bool)
            if TEST_SAMPLE_SIZE * NUM_FOLDS > num_samples:
                start = fold * TEST_SAMPLE_SIZE
                end = start + TEST_SAMPLE_SIZE
                ix = [i % num_samples for i in range(start, end)]
            #             print(f"ClassID: {i}, fold: {fold} - [{ix[0]}:{ix[-1]}]")
            else:
                class_fold_size = num_samples // NUM_FOLDS
                start = fold * class_fold_size
                end = start + class_fold_size
                ix = range(start, end)
            
            test_mask[ix] = True
            try:
                class_folds["test"].append(
                    group[test_mask].sample(n=TEST_SAMPLE_SIZE, random_state=0)
                )
            except:
                import pdb

                pdb.set_trace()
            class_folds["train"].append(group[~test_mask])
        all_folds.append(class_folds)
    return all_folds

In [6]:
all_folds = generate_fold()
print(len(all_folds))
for i, fold in enumerate(all_folds):
    train = pd.concat(fold["train"])
    test = pd.concat(fold["test"])
   
    train.to_parquet(folds_folder / f"fold_{i}_train.pq", compression="GZIP")
    test.to_parquet(folds_folder / f"fold_{i}_test.pq", compression="GZIP")

150
Fold 1
150
Fold 2
150
Fold 3
3


### Feature loading

In [21]:
features = np.load("features/vggface2_test_features.npy",allow_pickle=True)
path_arr = np.load("features/vggface2_test_paths.npy",allow_pickle=True)

In [22]:
meta = pd.DataFrame(path_arr, columns=["full_path"])
meta["file"] = meta.full_path.apply(lambda x: "/".join(Path(x).parts[-2:]))
labels = list(map(lambda x: str(x).split("/")[-2], path_arr))
le = preprocessing.LabelEncoder()
labels = le.fit_transform(labels)
meta["y_test"] = labels
meta = meta.merge(vgg_image_test,how='left',on='file')
meta.head()

Unnamed: 0,full_path,file,y_test,Class_ID,Sample_Num,Gender,Race,skintype,type,lips,nose,hairtype,haircolor,GenderM,eye
0,n000001/0001_01.jpg,n000001/0001_01.jpg,0,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow
1,n000001/0002_01.jpg,n000001/0002_01.jpg,0,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow
2,n000001/0003_01.jpg,n000001/0003_01.jpg,0,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow
3,n000001/0004_01.jpg,n000001/0004_01.jpg,0,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow
4,n000001/0005_01.jpg,n000001/0005_01.jpg,0,n000001,424,m,Asian,type4,test,small,wide,bald,gray,m,narrow


In [23]:
#Train CODE
def train(X,y):  
    all_predictions = []
    for i,fold in enumerate(range(0, NUM_FOLDS)):
        train_ixs = pd.read_parquet(folds_folder / f"fold_{i}_100_train.pq")
        test_ixs = pd.read_parquet(folds_folder / f"fold_{i}_100_test.pq")
        print(folds_folder / f"fold_{i}_train.pq")
        print(test_ixs.shape)
        print(meta[meta.file.isin(test_ixs.file)].index.shape)
        test_index = meta[meta.file.isin(test_ixs.file)].index
        train_index = meta[meta.file.isin(train_ixs.file)].index
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        print(X_test.shape,y_test.shape)
        print('SVM Training...')
        svm_model_linear = SVC(kernel="linear", C=1).fit(X_train, y_train)
        print("Starting prediction (The computer can be unresponsive during the prediction).")

        TEST_BATCH_SIZE = 1000
        preds = []
        ys=[]
        with tqdm(total=X_test.shape[0], file=sys.stdout) as pbar:
            for i in range(0, X_test.shape[0], TEST_BATCH_SIZE):
                X_test_batch = X_test[i : i + TEST_BATCH_SIZE]
                pred = svm_model_linear.predict(X_test_batch)
                preds.append(pred)
                # update tqdm
                pbar.set_description("Processed: %d" % (1 + i))
                pbar.update(TEST_BATCH_SIZE)
        all_predictions.append(preds)c
    return all_predictions
        

In [10]:
X = np.asarray(features, dtype=np.float32)
y = labels
pred_list = train(X,y)

folds/fold_0_train.pq
                   file Class_ID  Sample_Num Gender   Race skintype  type  \
45  n000001/0046_01.jpg  n000001         424      m  Asian    type4  test   
60  n000001/0061_01.jpg  n000001         424      m  Asian    type4  test   
7   n000001/0008_01.jpg  n000001         424      m  Asian    type4  test   
51  n000001/0052_01.jpg  n000001         424      m  Asian    type4  test   
66  n000001/0067_01.jpg  n000001         424      m  Asian    type4  test   

     lips  nose hairtype haircolor GenderM     eye  
45  small  wide     bald      gray       m  narrow  
60  small  wide     bald      gray       m  narrow  
7   small  wide     bald      gray       m  narrow  
51  small  wide     bald      gray       m  narrow  
66  small  wide     bald      gray       m  narrow  
(25000,)
(25000, 512) (25000,)
SVM Training...
Starting prediction (The computer can be unresponsive during the prediction).
Processed: 24001: 100%|██████████| 25000/25000 [09:26<00:00, 44.09it/s]


In [13]:
ap = np.asarray(pred_list)
ap = ap.reshape(ap.shape[0],ap.shape[1]*ap.shape[2])
ap.shape

(3, 25000)

In [20]:
# TEST CODE
result_dic = []
for i, fold in enumerate(range(0, 3)):
    train_ixs = pd.read_parquet(folds_folder / f"fold_{i}_train.pq")
    test_ixs = pd.read_parquet(folds_folder / f"fold_{i}_test.pq")

    print(meta.shape,test_ixs.index)
    meta_test = meta[meta.file.isin(test_ixs.file)]
    print(meta_test.shape)
    test_index = meta_test.index
    y_test = y[test_index]
 
    meta_test["y_pred"] = ap[i]
    print("Overall Accuracy:", accuracy_score(meta_test["y_test"], meta_test["y_pred"]))
    
    print("Group initilization!")
    for attr in attribute_map:
        # for one value of one attribute
        for key, value in attr.items():
            for val in value:
                subgroup = meta_test[meta_test[key] == val]
              
                score= accuracy_score(subgroup["y_test"], subgroup["y_pred"])
                print(
                    key,
                    val,
                    ":",
                    score,
                    subgroup.shape,
                    )
                    
                
                result_dic.append([key,val,score,subgroup.shape[0]])

(169396, 15) Int64Index([    45,     60,      7,     51,     66,     27,     71,     54,
               123,      8,
            ...
            169205, 169222, 169163, 169194, 169228, 169176, 169215, 169181,
            169191, 169186],
           dtype='int64', length=25000)
(25000, 15)
Overall Accuracy: 0.98764
Group initilization!
skintype type1 : 0.96625 (800, 16)
skintype type2 : 0.9903061224489796 (9800, 16)
skintype type3 : 0.9867289719626168 (10700, 16)
skintype type4 : 0.9870833333333333 (2400, 16)
skintype type5 : 0.9888888888888889 (900, 16)
skintype type6 : 0.99 (400, 16)
eye normal : 0.9878202247191011 (22250, 16)
eye narrow : 0.9861818181818182 (2750, 16)
haircolor red : 0.99 (200, 16)
haircolor black : 0.9899328859060402 (7450, 16)
haircolor gray : 0.993015873015873 (3150, 16)
haircolor brown : 0.9846783625730994 (8550, 16)
haircolor blonde : 0.9860176991150442 (5650, 16)
hairtype straight : 0.9875728155339806 (15450, 16)
hairtype wavy : 0.9869677419354839 (7750, 16)
ha

In [15]:
subgroup.shape

(14100, 16)

In [16]:
results = pd.DataFrame(result_dic,columns=['feature','category','acc','size'])
results["attribute_name"] = results["feature"] +'_'+ results["category"]
results = results.groupby('attribute_name').mean().sort_values(by='attribute_name')

results = results.reset_index()
results['attribute'] = results.attribute_name.apply(lambda x:x.split('_')[0])

total_size = results.groupby('attribute').sum()['size'][0]
print('total size',total_size)
results['Ratio']=results['size'].apply(lambda x: x/total_size)
results

total size 25000


Unnamed: 0,attribute_name,acc,size,attribute,Ratio
0,eye_narrow,0.952485,2750,eye,0.11
1,eye_normal,0.956959,22250,eye,0.89
2,haircolor_black,0.959776,7450,haircolor,0.298
3,haircolor_blonde,0.949676,5650,haircolor,0.226
4,haircolor_brown,0.953723,8550,haircolor,0.342
5,haircolor_gray,0.967937,3150,haircolor,0.126
6,haircolor_red,0.961667,200,haircolor,0.008
7,hairtype_bald,0.970952,700,hairtype,0.028
8,hairtype_curly,0.95,1100,hairtype,0.044
9,hairtype_straight,0.958813,15450,hairtype,0.618


In [29]:
results.to_csv('face_identifiacation_attribute_based_results.csv',index=False)

In [54]:
print("std:", np.std(results.acc.values,ddof=1) * 100)
print("bias:", (1-results.acc.values.min()) / (1-results.acc.values.max()))


std: 0.8587194162970204
bias: 2.4672131147540934


In [55]:
(1.0-results.acc.values.min())/(1-results.acc.values.max())

2.4672131147540934

In [56]:
1-results.acc.values.max()

0.0290476190476191

In [49]:
results

Unnamed: 0,attribute_name,acc,size,attribute,Ratio
0,eye_narrow,0.952485,2750,eye,0.11
1,eye_normal,0.956959,22250,eye,0.89
2,haircolor_black,0.959776,7450,haircolor,0.298
3,haircolor_blonde,0.949676,5650,haircolor,0.226
4,haircolor_brown,0.953723,8550,haircolor,0.342
5,haircolor_gray,0.967937,3150,haircolor,0.126
6,haircolor_red,0.961667,200,haircolor,0.008
7,hairtype_bald,0.970952,700,hairtype,0.028
8,hairtype_curly,0.95,1100,hairtype,0.044
9,hairtype_straight,0.958813,15450,hairtype,0.618


In [57]:
(1.0-results.acc.values.min()),(1-results.acc.values.max())

(0.07166666666666666, 0.0290476190476191)

In [60]:
# print(results[['feature_name','acc']].to_latex())
results["Ratio"] = results["Ratio"].apply(lambda x: f"{100*x:.2f}")
results["Acc"] = results["acc"].apply(lambda x: f"{100*x:.2f}")

results["attribute_name"] = results["attribute_name"].str.replace("skintype", "")
results["attribute_name"] = results["attribute_name"].str.replace("haircolor", "hair ")
results["attribute_name"] = results["attribute_name"].str.replace("hairtype", "hair ")
results["attribute_name"] = results["attribute_name"].str.title()
results["attribute_name"] = results["attribute_name"].apply(
    lambda x: " ".join(x.split("_")[::-1])
)
results = results.sort_values(by='Acc')
attribute_res = results[["attribute_name","Ratio", "Acc"]]

attribute_res = pd.concat(
    [
        attribute_res.iloc[:11].reset_index(drop=True),
        attribute_res.iloc[11:].reset_index(drop=True),
        
        
    ],
    axis=1,
    ignore_index=True,
)
attribute_res.columns = ["Attribute Category", "Ratio (%)","Accuracy (%)", "Attribute Category(%)", "Ratio","Accuracy(%)"]
attribute_res

Unnamed: 0,Attribute Category,Ratio (%),Accuracy (%),Attribute Category(%),Ratio,Accuracy(%)
0,Type1,3.2,92.83,Narrow Nose,56.4,95.71
1,Big Lips,27.4,94.8,Straight Hair,61.8,95.88
2,Blonde Hair,22.6,94.97,Small Lips,72.6,95.97
3,Curly Hair,4.4,95.0,Black Hair,29.8,95.98
4,Wavy Hair,31.0,95.14,Type6,1.6,96.0
5,Narrow Eye,11.0,95.25,Red Hair,0.8,96.17
6,Brown Hair,34.2,95.37,Type5,3.6,96.19
7,Wide Nose,43.6,95.57,Type4,9.6,96.26
8,Type3,42.8,95.64,Gray Hair,12.6,96.79
9,Type2,39.2,95.67,Bald Hair,2.8,97.1


In [52]:
print(
    attribute_res.to_latex(
        index=False, caption="Table Caption", label="tab:fi1", na_rep=""
    )
)

\begin{table}
\centering
\caption{Table Caption}
\label{tab:fi1}
\begin{tabular}{llllll}
\toprule
Attribute Category & Accuracy (\%) & Ratio (\%) & Attribute Category(\%) &  Ratio & Accuracy(\%) \\
\midrule
            Type1  &         3.2\% &     92.8\% &                Type2  &  39.2\% &       95.7\% \\
          Big Lips &        27.4\% &     94.8\% &        Straight Hair  &  61.8\% &       95.9\% \\
      Blonde Hair  &        22.6\% &     95.0\% &           Black Hair  &  29.8\% &       96.0\% \\
       Curly Hair  &         4.4\% &     95.0\% &            Small Lips &  72.6\% &       96.0\% \\
        Wavy Hair  &        31.0\% &     95.1\% &                Type6  &   1.6\% &       96.0\% \\
        Narrow Eye &        11.0\% &     95.2\% &             Red Hair  &   0.8\% &       96.2\% \\
       Brown Hair  &        34.2\% &     95.4\% &                Type5  &   3.6\% &       96.2\% \\
         Wide Nose &        43.6\% &     95.6\% &                Type4  &   9.6\% &       96.

In [20]:
print(type(all_predictions), type(y_s))
for y_test, y_pred in zip(y_s, all_predictions):
    print(type(y_pred), type(y_test))
    y_pred = np.array(list(chain(*y_pred)))
    print("Overall Accuracy:", accuracy_score(y_test, y_pred))

<class 'list'> <class 'list'>
<class 'list'> <class 'numpy.ndarray'>
Overall Accuracy: 0.98984
<class 'list'> <class 'numpy.ndarray'>
Overall Accuracy: 0.98184
<class 'list'> <class 'numpy.ndarray'>
Overall Accuracy: 0.96732
<class 'list'> <class 'numpy.ndarray'>
Overall Accuracy: 0.94552
<class 'list'> <class 'numpy.ndarray'>
Overall Accuracy: 0.93396


In [23]:
feature_arr = np.asarray(features, dtype=np.float32)
print(feature_arr[0][0], np.mean(feature_arr), np.std(feature_arr))
feature_arr = preprocessing.normalize(feature_arr)
print(feature_arr[0][0], np.mean(feature_arr), np.std(feature_arr))
labels = list(map(lambda x: x.split("/")[-2], path_arr))
le = preprocessing.LabelEncoder()
labels = le.fit_transform(labels)
X = feature_arr
y = labels

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

print("Data is ready!", X.shape, X.shape)

0.88497335 -0.0055923657 0.8592094
0.045853145 -0.00028270765 0.044193346
Data is ready! (169396, 512) (169396, 512)


In [4]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=0)
for train_index, test_index in sss.split(X, y):
    print("TRAIN:", type(train_index), "TEST:", type(test_index))
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    svm_model_linear = SVC(kernel="linear", C=20).fit(X_train, y_train)
    print("Training is completed.")

TRAIN: <class 'numpy.ndarray'> TEST: <class 'numpy.ndarray'>
Training is completed.


In [27]:
# import datetime
# timestr = datetime.datetime.now().isoformat().replace(":", ".")
# svm_model_file = f"svm_model_{timestr}"
# svm_model_linear.save_to_file(svm_model_file)
# print(f"Saved model to: {svm_model_file}")

In [28]:
import sys
from itertools import chain

from tqdm import tqdm

print("Starting prediction (The computer can be unresponsive during the prediction).")

TEST_BATCH_SIZE = 1000
preds = []
with tqdm(total=X_test.shape[0], file=sys.stdout) as pbar:
    for i in range(0, X_test.shape[0], TEST_BATCH_SIZE):
        X_test_batch = X_test[i : i + TEST_BATCH_SIZE]
        pred = svm_model_linear.predict(X_test_batch)
        preds.append(pred)
        # update tqdm
        pbar.set_description("Processed: %d" % (1 + i))
        pbar.update(TEST_BATCH_SIZE)

Starting prediction (The computer can be unresponsive during the prediction).
Processed: 24001: 100%|██████████| 25000/25000 [07:52<00:00, 52.89it/s]


In [29]:
y_pred = np.array(list(chain(*preds)))
print("Overall Accuracy:", accuracy_score(y_test, y_pred))

Overall Accuracy: 0.99148


In [57]:
test_ixs["y_pred"] = y_pred.astype(np.int)
test_ixs["y_test"] = y_test
test_ixs = test_ixs.rename(columns={"subject": "Class_ID"})
test_data = test_ixs.merge(vgg_test, on="Class_ID", how="left")

In [64]:
attribute_map = [
    {"skintype": ["type1", "type2", "type3", "type4", "type5", "type6"]},
    {"hairtype": ["straight", "wavy", "bald", "curly"]},
    {"haircolor": ["red", "black", "grey", "brown", "blonde"]},
    {"lips": ["small", "big"]},
    {"eye": ["normal", "narrow"]},
    {"nose": ["wide", "narrow"]},
]
print("Group initilization!")
for attr in attribute_map:
    # for one value of one attribute
    for key, value in attr.items():
        for val in value:
            subgroup = test_data[test_data[key] == val]
            print(
                key,
                val,
                ":",
                accuracy_score(subgroup["y_test"], subgroup["y_pred"]),
                subgroup.shape,
            )

Group initilization!
skintype type1 : 0.99875 (800, 19)
skintype type2 : 0.9933673469387755 (9800, 19)
skintype type3 : 0.9909345794392523 (10700, 19)
skintype type4 : 0.9816666666666667 (2400, 19)
skintype type5 : 0.9966666666666667 (900, 19)
skintype type6 : 0.9925 (400, 19)
hairtype straight : 0.9905501618122977 (15450, 19)
hairtype wavy : 0.9930322580645161 (7750, 19)
hairtype bald : 0.9914285714285714 (700, 19)
hairtype curly : 0.9936363636363637 (1100, 19)
haircolor red : 0.995 (200, 19)
haircolor black : 0.9914093959731544 (7450, 19)
haircolor grey : 0.9873015873015873 (3150, 19)
haircolor brown : 0.991812865497076 (8550, 19)
haircolor blonde : 0.9932743362831858 (5650, 19)
lips small : 0.9921763085399449 (18150, 19)
lips big : 0.9896350364963503 (6850, 19)
eye normal : 0.9918651685393258 (22250, 19)
eye narrow : 0.9883636363636363 (2750, 19)
nose wide : 0.9935779816513761 (10900, 19)
nose narrow : 0.9898581560283688 (14100, 19)


In [None]:
# y_pred = svm_model_linear.predict(X_test)

In [None]:
# features = np.load('features/unlearn_races_r50_feat_05ep.npz')
# meta_data = pd.read_csv('metadata/VGGFace2_200_Subjects_Test_Images.csv')

In [None]:
# features = np.load('../FeatureEncodingsRFW/senet50_ft_features.npy')
# train_ixs = pd.read_csv('../train_test_split/rfwtest_train_indexes.csv')
# test_ixs = pd.read_csv('../train_test_split/rfwtest_test_indexes.csv')

In [None]:
features = features["arr_0"]

In [None]:
feature_arr = np.asarray(features[:][:, :-1], dtype=np.float64)
path_arr = features[:][:, -1]

labels = list(map(lambda x: x.split("/")[0], path_arr))
le = preprocessing.LabelEncoder()
labels = le.fit_transform(labels)
X = pd.DataFrame(feature_arr)
y = pd.Series(labels)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
print("SVM!")

svm_model_linear = SVC(kernel="linear", C=1).fit(X_train, y_train)
y_pred = svm_model_linear.predict(X_test)
print("Overall Accuracy:", accuracy_score(y_test.values, y_pred))

print("Group initilization!")
test_pathes = path_arr[y_test.index.values]
for race in ["African", "Asian", "Caucasian", "Indian"]:
    for gender in ["m", "f"]:
        main_group = meta_data[
            (meta_data.race == race)
            & (meta_data.gender == gender)
            & (meta_data.generated_version == "original")
        ]
        group_file = main_group.filename.values
        indexes = []
        for el in group_file:
            loc = np.argwhere(test_pathes == el)
            if loc.size != 0:
                indexes.append(int(loc[0][0]))
        if len(indexes) > 0:
            indexes = np.asarray(indexes)
            print(race, gender)
            print(
                " accuracy:%d %.3f"
                % (
                    len(y_test.values[indexes]),
                    accuracy_score(y_test.values[indexes], y_pred[indexes]),
                )
            )

In [None]:
from sklearn.model_selection import GridSearchCV

feature_arr = np.asarray(features[:][:, :-1], dtype=np.float64)
path_arr = features[:][:, -1]

labels = list(map(lambda x: x.split("/")[0], path_arr))
le = preprocessing.LabelEncoder()
labels = le.fit_transform(labels)
X = pd.DataFrame(feature_arr)
y = pd.Series(labels)
param_grid = [
    {"C": [1, 10, 100, 1000], "kernel": ["linear"]},
    {"C": [1, 10, 100, 1000], "gamma": [0.001, 0.0001], "kernel": ["rbf"]},
]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
grid_search = GridSearchCV(SVC(), param_grid, cv=2)
svm_model = grid_search.fit(X_train, y_train)
print(grid_search.best_params_)

# y_pred = svm_model.predict(X_test)
# print('Overall Accuracy:',accuracy_score(y_test.values, y_pred))

# print('Group initilization!')
# test_pathes = path_arr[y_test.index.values]
# for race in ['African','Asian','Caucasian', 'Indian']:
#     for gender in ['f','m']:
#         main_group = meta_data[(meta_data.race == race) & (meta_data.gender== gender) & (meta_data.generated_version== 'original') ]
#         group_file = main_group.filename.values
#         indexes = []
#         for el in group_file:
#             loc = np.argwhere(test_pathes==el)
#             if loc.size != 0:
#                 indexes.append(int(loc[0][0]))
#         if len(indexes)>0:
#             indexes = np.asarray(indexes)
#             print(race,gender)
#             print(' accuracy:%d %.3f'%(len(y_test.values[indexes]), accuracy_score(y_test.values[indexes], y_pred[indexes])))

SVM!

Overall Accuracy: 0.5139621028636558

Group initilization!
African m
 accuracy:1551 0.454
African f
 accuracy:1953 0.473
Asian m
 accuracy:1610 0.496
Asian f
 accuracy:1516 0.383
Caucasian m
 accuracy:1797 0.559
Caucasian f
 accuracy:1959 0.590
Indian m
 accuracy:1964 0.615
Indian f
 accuracy:1688 0.498