# generate_paris.py

In [39]:

import os
import random
import argparse

In [40]:
class GeneratePairs:

    """
    Generate the pairs.txt file that is used for training face classifier when calling python `src/train_softmax.py`.
    Or others' python scripts that needs the file of pairs.txt.

    Doc Reference: http://vis-www.cs.umass.edu/lfw/README.txt
    """

    def __init__(self, data_dir, pairs_filepath, img_ext):
        """
        Parameter data_dir, is your data directory.
        Parameter pairs_filepath, where is the pairs.txt that belongs to.
        Parameter img_ext, is the image data extension for all of your image data.
        """
        self.data_dir = data_dir
        self.pairs_filepath = pairs_filepath
        self.img_ext = img_ext


    def generate(self):
        for i in range(10):
            self._generate_matches_pairs()
            self._generate_mismatches_pairs()


    def _generate_matches_pairs(self):
        """
        Generate all matches pairs
        """
        for name in os.listdir(self.data_dir):
            if name == ".DS_Store":
                continue

            a = []
            for file in os.listdir(self.data_dir + name):
                if file == ".DS_Store":
                    continue
                a.append(file)
#             print(a)
            with open(self.pairs_filepath, "a") as f:
                for i in range(3):
                    temp = random.choice(a).split("_") # This line may vary depending on how your images are named.
#                     print(temp)
                    w = temp[0]

                    l = random.choice(a).split("_")[1].lstrip("0").rstrip(self.img_ext)
                    
                    r = random.choice(a).split("_")[1].lstrip("0").rstrip(self.img_ext)
                    f.write(w + "\t" + l + "\t" + r + "\n")


    def _generate_mismatches_pairs(self):
        """
        Generate all mismatches pairs
        """
        for i, name in enumerate(os.listdir(self.data_dir)):
            if name == ".DS_Store":
                continue

            remaining = os.listdir(self.data_dir)
            remaining = [f_n for f_n in remaining if f_n != ".DS_Store"]
            # del remaining[i] # deletes the file from the list, so that it is not chosen again
            other_dir = random.choice(remaining)
            with open(self.pairs_filepath, "a") as f: 
                for i in range(3):
                    file1 = random.choice(os.listdir(self.data_dir + name))
#                     print('first', file1, name)
                    file2 = random.choice(os.listdir(self.data_dir + other_dir))
#                     print('second', file2, other_dir)
                    number_1 = file1.split("_")[1].lstrip("0").rstrip(self.img_ext)
                    number_2 = file2.split("_")[1].lstrip("0").rstrip(self.img_ext)
                    print(number_1, number_2)
                    # f.write(name + "\t" + file1.split("_")[2].lstrip("0").rstrip(self.img_ext) + "\n")
                    f.write(name + "\t" + number_1 + "\t" + other_dir + "\t" + number_2 + '\n')

In [41]:

data_dir = '../data/kface/kface_test_112x112_rename/'
pairs_filepath = '../data/kface/kface_pairs.txt'
img_ext = ".jpg"
generatePairs = GeneratePairs(data_dir, pairs_filepath, img_ext)
generatePairs.generate()

25 25
26 24
3 14
9 28
20 15
2 13
15 14
12 27
21 4
10 3
26 17
5 3
12 25
28 22
8 1
12 24
9 1
27 23
22 24
12 22
14 25
9 25
10 23
10 17
21 23
8 15
18 15
7 8
18 27
3 3
1 5
11 8
21 22
12 9
13 2
23 22
17 9
12 10
22 20
25 15
24 10
26 11
8 19
7 30
11 30
29 13
9 11
11 25
14 13
9 24
7 6
12 22
6 18
5 28
22 7
16 14
27 8
2 5
2 30
16 19
17 13
21 9
26 22
21 17
7 16
18 6
12 3
21 10
25 24
13 13
16 2
21 9
25 8
12 13
7 23
21 4
6 14
5 6
15 25
14 13
3 10
20 29
5 23
14 2
3 10
4 8
13 9
4 27
14 13
13 4
13 10
18 12
5 27
11 8
7 16
15 12
5 8
17 18
30 23
12 2
22 6
26 28
25 16
17 4
14 14
19 14
14 1
27 27
12 6
8 1
1 8
17 8
14 6
18 13
23 25
6 1
20 22
16 15
15 20
22 5
25 14
6 19
13 8
29 30
18 10
15 5
6 15
6 26
9 30
16 2
24 12
11 18
3 19
28 9
20 8
27 14
2 24
21 10
2 1
22 29
13 29
24 11
5 21
11 16
18 4
10 8
14 17
11 13
7 30
4 8
18 8
6 10
4 19
5 21
28 24
23 8
24 4
23 7
12 7
29 8
6 28
22 9
18 7
2 26
21 2
20 19
14 12
11 13
24 8
16 12
18 4
6 4
29 15
30 13
21 30
29 17
27 16
1 12
1 27
9 11
5 2
16 9
6 11
3 14
15 16
1 14
28 25


12 29
17 17
1 26
13 6
9 11
17 11
2 16
2 10
3 10
22 22
11 28
18 12
18 7
29 2
3 16
19 1
25 29
20 24
5 23
9 13
25 2
13 8
22 23
6 18
23 6
23 2
10 22
14 7
5 3
4 10
22 18
2 3
11 19
19 4
10 21
9 20
18 1
12 9
22 21
13 15
15 7
25 3
14 17
2 26
5 24
14 16
17 2
5 3
27 21
19 17
23 11
3 7
14 24
10 1
29 18
16 30
27 30
22 16
18 16
11 22
21 7
6 3
9 11
21 15
25 13
7 3
20 26
7 27
25 4
12 2
25 9
11 7
21 1
1 28
9 26
13 2
16 7
17 15
21 11
13 21
10 2
16 6
26 1
22 26
21 8
14 1
12 4
17 16
4 22
12 10
20 12
10 19
2 9
7 26
16 26
24 3
9 9
27 16
3 15
28 13
12 14
2 20
12 1
8 24
5 18
13 24
4 15
22 24
10 5
17 5
7 13
18 6
3 22
14 13
15 19
5 26
21 22
20 7
15 19
12 3
19 17
20 20
21 24
22 25
2 4
13 15
14 17
22 9
3 19
25 13
27 6
5 21
23 15
3 7
19 8
12 6
5 9
25 10
27 15
21 7
24 29
2 6
12 17
11 1
20 2
26 1
12 26
24 8
9 24
1 17
16 7
8 5
11 19
5 9
22 1
9 28
28 11
9 17
23 21
14 10
11 12
12 21
18 23
20 2
6 4
10 22
20 28
1 14
16 23
18 20
20 19
10 14
29 10
21 21
4 1
9 10
1 5
3 7
1 29
8 13
10 5
19 3
6 11
3 12
22 20
29 9
6 26
8 13
1

8 13
27 21
28 6
7 10
3 13
13 17
25 28
1 12
13 23
9 21
9 24
13 24
5 7
6 14
2 9
30 5
9 9
23 3
28 23
17 4
10 2
1 18
2 23
14 16
23 6
22 16
10 9
5 17
23 14
25 1
19 30
22 16
22 1
12 30
2 14
19 18
30 1
12 26
5 17
2 13
15 8
1 12
15 6
26 19
5 11
1 27
19 19
10 14
3 24
6 24
25 12
2 29
23 4
14 9
24 6
11 11
3 26
6 29
14 3
17 20
22 2
4 6
28 14
21 1
7 15
10 18
5 3
5 18
20 18
10 17
22 24
3 21
25 19
16 2
6 5
2 10
14 5
2 6
18 10
28 19
14 12
5 18
25 1
10 1
5 9
11 15
11 7
5 26
30 23
19 10
23 24
10 1
19 5
1 2
27 22
1 3
22 1
3 8
21 4
20 8
18 16
23 15
21 11
11 5
19 4
10 14
4 15
15 17
29 15
7 29
25 11
3 12
4 7
29 4
26 20
8 23
24 3
5 7
15 4
26 2
10 11
8 13
6 25
26 13
9 27
22 10
20 11
10 16
8 23
13 13
6 8
18 8
18 16
28 28
21 28
5 8
16 10
17 20
3 25
18 9
10 28
27 7
25 5
16 26
3 23
5 9
15 23
10 25
11 27
9 23
25 17
18 19
10 13
20 25
11 3
22 21
24 2
29 6
8 29
19 27
9 23
23 24
14 29
27 3
15 21
7 19
3 7
7 14
23 23
21 25
17 13
13 15
20 18
26 7
4 17
23 7
2 3
10 6
13 12
29 11
18 8
11 17
14 19
14 22
25 7
25 26
11 3
11 8


27 12
15 3
15 18
6 26
23 30
10 22
3 9
10 24
14 22
12 21
14 7
3 13
13 1
8 5
5 12
5 24
9 18
21 2
6 27
8 19
5 23
10 14
21 18
5 15
26 22
21 10
6 6
3 11
17 8
23 19
15 19
30 1
14 8
19 15
24 9
21 11
13 17
13 9
7 14
15 5
16 3
25 29
12 26
9 11
11 1
29 13
15 3
21 8
25 28
7 5
10 6
27 10
8 21
4 13
15 2
12 13
4 1
7 7
3 14
18 5
17 1
7 25
29 28
29 23
14 5
12 30
16 17
10 29
10 4
13 20
17 7
11 29
11 1
16 29
28 20
19 2
21 6
5 18
20 15
16 24
14 25
1 18
2 17
22 3
21 4
14 22
9 29
16 14
14 28
13 20
13 6
15 20
16 11
24 12
14 1
15 22
23 7
19 8
10 30
2 21
2 9
13 20
1 18
1 21
17 19
3 8
15 4
22 5
29 14
1 18
15 12
14 5
21 19
8 14
8 21
4 10
22 10
2 21
14 28
27 4
26 6
12 22
23 1
25 8
18 21
11 12
14 6
16 2
16 12
26 7
12 5
27 18
27 24
22 25
8 25
17 8
24 8
14 16
28 5
26 21
9 23
9 10
10 22
24 16
12 11
12 15
22 24
14 1
2 22
7 1
9 6
1 20
13 17
24 20
30 13
8 12
13 20
5 23
8 5
20 1
23 4
1 18
1 21
20 26
9 9
21 18
25 12
19 10
3 20
19 1
16 1
18 8
24 13
12 15
18 23
8 11
2 29
26 9
12 24
17 17
6 25
9 20
29 14
17 21
25 13
8 12
2 

22 11
18 8
14 21
9 8
27 26
22 28
20 15
3 12
9 14
8 1
10 23
11 3
5 22
23 15
13 17
25 9
18 22
29 20
12 6
1 29
14 12
16 15
17 19
7 18
2 22
10 10
22 6
5 5
2 20
27 15
4 11
11 13
19 20
19 7
6 25
21 30
27 25
27 3
6 2
2 9
15 5
13 7
21 24
15 30
3 3
4 20
13 26
19 1
8 9
22 20
4 12
13 19
3 18
3 24
1 14
13 25
25 2
10 30
16 28
10 16
3 11
15 16
11 11
16 26
5 3
30 7
27 27
12 15
2 29
26 20
9 27
7 24
15 3
10 18
3 3
6 9
23 16
3 5
26 11
18 8
20 14
11 15
25 16
6 6
20 14
10 20
14 17
16 11
16 12
15 22
14 24
1 13
10 11
15 13
20 3
11 22
10 22
18 20
9 3
26 26
25 17
9 11
7 4
10 5
3 17
18 21
3 1
19 16
10 10
1 25
8 17
18 3
13 26
24 9
7 10
24 5
16 28
5 15
8 13
15 2
18 4
17 19
1 9
2 7
2 5
9 22
22 21
6 10
4 16
19 9
19 5
8 3
13 27
7 17
21 15
9 4
16 1
19 2
1 23
26 13
26 23
20 1
17 19
12 1
2 8
19 16
2 17
10 26
14 17
20 15
12 11
26 24
17 7
14 19
8 12
9 12
1 25
5 9
21 10
5 19
20 15
22 1
14 19
12 7
16 15
22 26
2 8
29 22
11 11
18 20
27 21
19 11
25 21
15 10
4 29
9 5
4 4
29 6
4 5
17 14
9 23
18 5
11 24
13 27
22 2
15 2
22 4
1 3

17 28
6 4
6 1
4 16
13 11
17 20
11 25
19 29
19 16
20 23
25 18
11 23
25 24
15 9
20 2
28 20
8 4
8 14
25 9
30 16
19 18
19 14
26 10
24 27
4 24
12 15
22 2
7 13
26 7
6 11
13 9
19 19
11 2
15 24
27 29
12 18
23 15
17 20
22 21
4 11
11 19
4 5
9 18
18 29
15 15
14 16
7 17
28 18
8 22
20 4
10 13
15 18
14 8
24 22
28 5
13 27
30 17
1 1
10 22
28 4
14 24
18 10
23 23
15 13
9 7
1 6
5 25
18 10
24 22
1 9
20 18
8 22
7 2
12 24
8 26
10 6
2 21
20 9
16 16
12 22
9 3
1 9
20 22
5 14
23 20
15 14
2 4
11 22
1 6
25 18
17 27
12 11
24 1
23 13
25 29
2 14
9 1
21 28
21 9
8 11
23 1
9 6
25 8
1 5
29 12
3 16
27 15
2 4
6 9
12 27
16 7
13 14
3 14
17 27
23 20
1 24
13 5
22 7
14 15
16 7
27 4
16 26
2 2
5 6
15 25
10 1
18 19
24 13
6 18
2 11
28 21
12 8
13 3
19 13
25 14
7 29
7 11
2 10
18 6
9 6
23 16
21 8
28 26
7 10
12 24
23 17
22 18
11 3
25 7
19 13
13 16
26 1
15 16
19 14
8 7
9 21
17 11
10 24
3 7
2 15
20 7
22 17
13 1
22 17
20 25
6 2
28 9
15 24
18 6
27 21
5 24
29 1
9 18
13 1
2 4
8 2
2 30
17 5
7 6
23 7
25 13
17 11
20 2
20 26
5 5
30 12
5 9
3 8
1

28 2
5 2
18 4
10 5
18 14
9 20
9 1
16 12
21 5
17 8
6 4
20 15
13 18
8 7
12 24
2 9
21 6
28 8
30 26
13 3
10 27
15 27
16 28
10 20
1 7
24 16
3 16
25 9
5 2
24 6
2 19
6 21
3 3
25 10
28 17
27 13
19 27
3 17
5 20
1 1
8 15
1 20
4 7
24 28
29 23
11 13
27 7
5 10
6 25
17 15
11 10
19 15
12 15
28 10
4 16
17 6
22 22
12 9
19 25
1 4
9 23
1 2
3 27
24 23
13 5
6 5
3 16
8 18
17 7
27 3
26 18
23 26
25 22
18 28
1 26
20 13
16 18
5 15
6 12
25 20
19 8
14 7
13 3
13 9
23 28
26 4
16 25
28 17
21 24
24 21
18 27
24 2
24 18
5 4
1 3
9 27
14 10
27 1
14 11
29 23
26 5
6 1
18 2
24 10
17 23
7 10
1 16
2 10
13 11
9 3
12 3
16 30
10 23
10 30
14 23
17 4
8 22
8 5
15 14
10 12
18 17
16 8
6 18
28 13
3 25
5 18
2 26
3 13
19 11
14 5
6 9
16 5
28 7
20 19
6 6
9 12
2 7
24 21
6 15
27 25
19 26
1 9
25 13
1 23
15 9
10 24
18 20
27 29
30 24
6 13
16 22
16 11
18 8
1 30
14 6
3 4
16 23
15 16
29 19
25 7
11 9
19 6
6 5
8 4
7 15
5 13
26 16
30 23
9 16
3 28
13 18
18 13
14 13
20 5
6 15
22 20
13 6
16 14
15 5
11 9
19 11
18 8
20 20
5 16
8 9
11 13
9 23
11 22
15 23


3 9
20 20
16 22
16 13
24 25
9 26
13 30
11 23
4 28
3 5
9 10
8 23
7 7
20 26
19 26
25 1
2 3
28 3
12 21
10 10
11 19
19 5
15 1
2 9
27 5
4 3
1 11
4 2
22 8
29 28
20 10
22 13
17 15
21 19
17 29
4 24
25 1
8 28
27 10
25 14
6 14
7 24
18 11
18 17
19 13
3 27
10 14
25 9
28 16
18 23
17 12
23 16
27 28
26 25
24 19
5 12
11 30
24 5
18 27
16 22
12 6
15 3
29 18
26 27
14 24
13 29
22 5
24 28
18 5
19 10
17 15
14 26
20 17
4 27
27 18
23 21
26 16
24 17
4 13
18 17
6 25
20 1
3 26
13 9
8 5
28 2
11 6
22 6
9 9
6 7
4 13
9 10
1 1
18 16
18 16
22 6
29 5
24 3
26 15
8 11
19 16
22 1
28 8
8 21
23 3
10 26
17 8
22 19
15 16
2 8
27 12
17 15
20 1
6 7
25 19
2 3
30 2
30 26
19 25
20 23
1 4
28 23
12 26
15 23
7 15
10 24
25 10
21 6
11 8
8 18
13 4
20 15
19 23
8 18
17 8
28 2
19 13
4 20
7 14
7 3
10 17
6 24
11 15
22 21
25 28
13 25
13 8
12 7
14 4
17 2
16 11
22 19
1 6
21 28
12 8
5 20
4 3
4 21
11 13
14 6
27 4
10 10
13 6
7 15
3 25
12 25
13 11
15 6
13 12
13 10
11 8
12 13
15 17
4 26
12 20
4 17
26 6
24 24
2 3
27 24
22 3
23 25
23 4
19 10
13 27
9 24

In [42]:
with open(pairs_filepath, 'r') as f:
    data = f.read()

In [43]:
data

'19062421\t11\t10\n19062421\t2\t21\n19062421\t2\t16\n19062431\t27\t14\n19062431\t29\t24\n19062431\t17\t4\n19062521\t15\t3\n19062521\t13\t5\n19062521\t20\t7\n19062531\t2\t5\n19062531\t8\t21\n19062531\t25\t19\n19062542\t4\t23\n19062542\t5\t28\n19062542\t25\t12\n19062621\t17\t8\n19062621\t19\t18\n19062621\t8\t19\n19062622\t18\t8\n19062622\t3\t10\n19062622\t24\t13\n19062641\t26\t10\n19062641\t1\t25\n19062641\t13\t16\n19062722\t6\t10\n19062722\t23\t16\n19062722\t10\t10\n19062731\t16\t10\n19062731\t18\t10\n19062731\t17\t2\n19062732\t7\t12\n19062732\t13\t3\n19062732\t20\t15\n19062811\t11\t4\n19062811\t5\t2\n19062811\t3\t21\n19062831\t21\t8\n19062831\t8\t22\n19062831\t3\t5\n19062832\t25\t25\n19062832\t17\t25\n19062832\t18\t28\n19062841\t5\t1\n19062841\t16\t15\n19062841\t3\t1\n19062842\t21\t26\n19062842\t7\t25\n19062842\t28\t17\n19070121\t20\t18\n19070121\t12\t13\n19070121\t10\t1\n19070142\t9\t1\n19070142\t2\t14\n19070142\t2\t8\n19070212\t26\t25\n19070212\t25\t14\n19070212\t11\t5\n19070221\t24\

In [44]:
import pandas as pd
pairs_data = pd.read_csv(pairs_filepath, sep = '\t', names = ['1','2','3','4'])

In [45]:
pairs_data.head()

Unnamed: 0,1,2,3,4
0,19062421,11,10,
1,19062421,2,21,
2,19062421,2,16,
3,19062431,27,14,
4,19062431,29,24,


In [46]:
len(pairs_data)

24000