# Find combinations of images

In [11]:
import pandas as pd
import itertools

In [2]:
df = pd.read_csv('mapping.csv')

In [6]:
df[0:5]

Unnamed: 0,folder,file name,smoke
0,20170625-BBM-bm-n-mobo,1498413993_-02400.jpg,0
1,20170625-BBM-bm-n-mobo,1498414053_-02340.jpg,0
2,20170625-BBM-bm-n-mobo,1498414113_-02280.jpg,0
3,20170625-BBM-bm-n-mobo,1498414173_-02220.jpg,0
4,20170625-BBM-bm-n-mobo,1498414233_-02160.jpg,0


In [15]:
test = pd.DataFrame(data={'file name': [1, 2, 3, 4], 'smoke': [0, 1, 0, 0]})
test

Unnamed: 0,file name,smoke
0,1,0
1,2,1
2,3,0
3,4,0


In [7]:
def apply_rules(smoke1, smoke2):
    if smoke1 == 0 and smoke2 == 0:
        return 0
    elif smoke1 == 0 and smoke2 == 1:
        return 1
    elif smoke1 == 1 and smoke2 == 1:
        return 1

In [4]:
def find_combinations_one_camera(df, file_name_col, smoke_col):
    new = []
    for i in range(0, len(df[file_name_col])):
        for j in range(0, len(df[file_name_col])):
            new.append(
            {
                "image1": df[file_name_col][i],
                "image2": df[file_name_col][j],
                "label": apply_rules(df[smoke_col][i], df[smoke_col][j])
            })
    return pd.DataFrame(new)

find_combinations_one_camera is not the right function because it does no take unique combinations. The second version is better. Thank you Jared for this!

In [8]:
def find_combinations_one_camera2(df, file_name_col, smoke_col):
    new = []
    list_row_indices = list(range(len(df)))  # rows in list 0-(nrows-1)
    row_pair_combinations = itertools.combinations(list_row_indices, 2)  # unique combinations of image pairs
     
    for index in row_pair_combinations:
         new.append(
             {
                 "image1": df[file_name_col][index[0]],
                 "image2": df[file_name_col][index[1]],
                 "label": apply_rules(df[smoke_col][index[0]], df[smoke_col][index[1]])
             })
    return pd.DataFrame(new)

In [16]:
combinations = find_combinations_one_camera2(test, 'file name', 'smoke')

In [17]:
combinations

Unnamed: 0,image1,image2,label
0,1,2,1.0
1,1,3,0.0
2,1,4,0.0
3,2,3,
4,2,4,
5,3,4,0.0


In [28]:
camera = df.groupby('folder')['file name'].count().reset_index(name='images')
camera

Unnamed: 0,folder,images
0,20170625-BBM-bm-n-mobo,81
1,20170708-Whittier-syp-n-mobo-c,71
2,20171207-Lilac-rm-s-mobo,71
3,20180611-fallbrook-rm-w-mobo-c,77
4,20180614-Bridle-hp-n-mobo-c,81
5,20180614-Hope-wc-e-mobo-c,64
6,20180704-Benton-hp-n-mobo-c,61
7,20180706-West-lp-n-mobo-c,81
8,20180725-Cranston-hp-n-mobo-c,67
9,20190529-94Fire-lp-s-mobo-c,64


In [29]:
camera.to_csv('camera.csv')

In [19]:
final = pd.DataFrame()
for i in range(0, len(camera['folder'])):
    filtered = df[df['folder'] == camera['folder'][i]].reset_index()
    combinations = find_combinations_one_camera2(filtered, 'file name', 'smoke')
    final = pd.concat([final, combinations], ignore_index=True)

In [21]:
final

Unnamed: 0,image1,image2,label
0,1498413993_-02400.jpg,1498414053_-02340.jpg,0
1,1498413993_-02400.jpg,1498414113_-02280.jpg,0
2,1498413993_-02400.jpg,1498414173_-02220.jpg,0
3,1498413993_-02400.jpg,1498414233_-02160.jpg,0
4,1498413993_-02400.jpg,1498414293_-02100.jpg,0
...,...,...,...
125923,1572720995.jpg,1572721115.jpg,0
125924,1572720995.jpg,1572721175.jpg,0
125925,1572721055.jpg,1572721115.jpg,0
125926,1572721055.jpg,1572721175.jpg,0


Issue about missing value - we do not have this issue anymore with unique combinations

In [20]:
final.isnull().sum()

image1    0
image2    0
label     0
dtype: int64

What should we do with the combination smoke(t) and no_smoke(t-1) ?  
We have 23043 labels with this case.

For now, I remove these cases from our database.

In [11]:
final.dropna(inplace=True)

In [42]:
final.isnull().sum()

image1    0
image2    0
label     0
dtype: int64

In [23]:
final.groupby('label').count()

Unnamed: 0_level_0,image1,image2
label,Unnamed: 1_level_1,Unnamed: 2_level_1
0,87962,87962
1,37966,37966


In [24]:
final['label'] = final['label'].astype('int32')

In [25]:
final.reset_index(inplace=True)

In [26]:
final

Unnamed: 0,index,image1,image2,label
0,0,1498413993_-02400.jpg,1498414053_-02340.jpg,0
1,1,1498413993_-02400.jpg,1498414113_-02280.jpg,0
2,2,1498413993_-02400.jpg,1498414173_-02220.jpg,0
3,3,1498413993_-02400.jpg,1498414233_-02160.jpg,0
4,4,1498413993_-02400.jpg,1498414293_-02100.jpg,0
...,...,...,...,...
125923,125923,1572720995.jpg,1572721115.jpg,0
125924,125924,1572720995.jpg,1572721175.jpg,0
125925,125925,1572721055.jpg,1572721115.jpg,0
125926,125926,1572721055.jpg,1572721175.jpg,0


In [27]:
final.to_csv('combinations.csv')