In [57]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.ensemble import BaggingClassifier as knn_bagging
from sklearn.linear_model import SGDClassifier as sgd
from sklearn.linear_model import LogisticRegression as logistic_reg
from sklearn.preprocessing import StandardScaler

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.svm import SVC
# 
import imblearn
plt.style.use("dark_background")


In [58]:
old_run = False

has_tr_train = True
has_tr_test = False
has_tr_unc = False

use_uncertain = True
encoding_size = 512

run_on_raw = True
knn_neighbors = 11

run_name = 'C23_C24_july_12_rerun'#'c23_june_21' #'c23_c24_june_14'
based_on = '12_07_redo-train/test/val' #'finetuning_14_06_23_c23_c24_no_prevs_replacement_pos_train/test_lowdecay_20.csv'

In [60]:
df_train = pd.read_csv('./out/shuffled_train_val.csv')
df_test = pd.read_csv('./out/shuffled_test.csv')

df_test['name']= df_test['name'].str.replace('pos-concat-val','pos-concat')
df_train['name']= df_train['name'].str.replace('pos-upsampled-concat','pos-concat')
df_test['name']= df_test['name'].str.replace('pos-upsampled-concat','pos-concat')
df_test['name']= df_test['name'].str.replace('dataset/C','dataset/all/C')
df_train = df_train[~df_train['name'].isin(df_test['name'])]

if use_uncertain:
    df_uncertain_test = pd.read_csv('./out/shuffled_test.csv')

# df_test = pd.concat([pd.read_csv('./negs_test.csv'), pd.read_csv('./out/07_07_upsfix_supcon_upsample_test.csv')], ignore_index=True)

In [61]:
df_test['name'][0]

'/home/vanessa/Dev/DATASETS/C23_C24_pos-concat/poz/img_poz_1996_07_09_09_01_00_79.png'

In [None]:
df_train['name'][0]

In [None]:
if old_run:
    cols_to_extract_no_tr  = ['useless1', 'part_of_quake','Year','month','day','event_idx','frame']
else:
    cols_to_extract_no_tr  = ['useless1','useless2', 'useless3', 'part_of_quake','Year','month','day','Hour','Minute','Seconds','frame']
cols_to_extract_has_tr  = cols_to_extract_no_tr + ['transform']
emb_cols = [f'{x}' for x in range(0,encoding_size)]
# emb_cols = ['1', '3', '5', '7', '9', '11', '13', '15', '17']

In [None]:
def update_cols(df, has_tr):
    if (has_tr):
        df[cols_to_extract_has_tr] = df['name'].str.split('_', expand=True)
        df.loc[df['transform'].isna(),'transform'] = '20'
        df['transform'] = df['transform'].str.extract('(\d+)', expand=False)
        df['transform']=df['transform'].astype(int)
    else:
        df[cols_to_extract_no_tr] = df['name'].str.split('_', expand=True)
    
    df['frame'] = df['frame'].str.extract('(\d+)', expand=False)
    df['frame']=df['frame'].astype(int)
    return df

def set_custom_preds(df, predcol=''):
    is_pos = 1 if df[predcol+'_preds'].mean() > 0.5 else 0
    is_pos_any = 1 if df[predcol+'_preds'].any() else 0
    df[f'avg_{predcol}_preds'] = is_pos
    df[f'any_{predcol}_preds'] = is_pos_any
    return df

def set_preds(df_train, df_test, df_uncertain, preds_train, preds_test, preds_uncertain, clstype):
    df_train[clstype+'_preds'] = preds_train
    df_test[clstype+'_preds'] = preds_test
    if use_uncertain:
        df_uncertain[clstype+'_preds'] = preds_uncertain

In [None]:
df_train = update_cols(df_train, has_tr=has_tr_train)
df_test = update_cols(df_test, has_tr=has_tr_test)
if use_uncertain:
    df_uncertain_test = update_cols(df_uncertain_test, has_tr=has_tr_unc)
else:
    df_uncertain_test = None

In [None]:
len(df_train[(df_train['label'] == 1) & (df_train['transform'] == 20)])

c23: 11 train 4 test


c24: 25 train 7 test


In [None]:

if not old_run:
    df_train['date'] = pd.to_datetime(df_train[['Year', 'month', 'day', 'Hour','Minute','Seconds']])
    df_train['date'].unique() 
    
    df_test['date'] = pd.to_datetime(df_test[['Year', 'month', 'day', 'Hour','Minute','Seconds']])
    df_test['date'].unique()

    if use_uncertain:
        df_uncertain_test['date'] = pd.to_datetime(df_uncertain_test[['Year', 'month', 'day', 'Hour','Minute','Seconds']])
        df_uncertain_test['date'].unique()

In [None]:
if run_on_raw:
    if (has_tr_train):
        df_train = df_train[df_train['transform'] == 20]
    if (has_tr_test):
        df_test = df_test[df_test['transform'] == 20]
    if use_uncertain:
        if has_tr_unc:
            df_uncertain_test = df_uncertain_test[df_uncertain_test['transform'] == 20]

if (not has_tr_train and not has_tr_test):
    run_on_raw = True

In [None]:
# df_train = df_train[df_train['Year'] >= '2011']
# df_test = df_test[df_test['Year'] >= '2011']

### Smote

In [None]:
scaler = StandardScaler()
scaled_train = scaler.fit_transform(df_train[emb_cols])
scaled_test = scaler.transform(df_test[emb_cols])
if use_uncertain:
    scaled_uncertain = scaler.transform(df_uncertain_test[emb_cols])

In [None]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

over = SMOTE(sampling_strategy=0.2)
under = RandomUnderSampler(sampling_strategy=0.75)

steps = [('o', over), ('u', under)]
pipeline = Pipeline(steps=steps)

X, y = pipeline.fit_resample(scaled_train, df_train['label'])
print (len([x for i,x in enumerate(y) if x == 0]))
print (len([x for i,x in enumerate(y) if x == 1]))

svc1 = SVC(C=0.005, kernel='poly')#, class_weight='balanced')
svc1.fit(X, y)
preds_train_lr = svc1.predict(scaled_train)
preds_test_lr = svc1.predict(scaled_test)

preds_unc_lr = None
if use_uncertain:
    preds_unc_lr = svc1.predict(scaled_uncertain)

preds_y = svc1.predict(X)

set_preds(df_train, df_test, df_uncertain_test, preds_train_lr, preds_test_lr, preds_unc_lr, f'logistic_regression')


print(classification_report(df_test['label'], preds_test_lr))
print(confusion_matrix(df_test['label'], preds_test_lr))
if use_uncertain:
    print(confusion_matrix(df_uncertain_test['label'], preds_unc_lr))
    print(classification_report(df_uncertain_test['label'], preds_unc_lr))

In [None]:
# svc1 = SVC(C=0.005, kernel='poly')#, class_weight='balanced')
# svc1.fit(scaled_train, df_train['label'])
# preds_train_lr = svc1.predict(scaled_train)
# preds_test_lr = svc1.predict(scaled_test)

# preds_unc_lr = None
# if use_uncertain:
#     preds_unc_lr = svc1.predict(scaled_uncertain)

# preds_y = svc1.predict(X)

# set_preds(df_train, df_test, df_uncertain_test, preds_train_lr, preds_test_lr, preds_unc_lr, f'logistic_regression')


# print(classification_report(df_test['label'], preds_test_lr))
# print(confusion_matrix(df_test['label'], preds_test_lr))
# if use_uncertain:
#     print(confusion_matrix(df_uncertain_test['label'], preds_unc_lr))
#     print(classification_report(df_uncertain_test['label'], preds_unc_lr))

In [None]:
# gb2 = gb_cols.remove('frame')

In [None]:
# df_test

In [None]:
# # df_test['consec_pred_count'] = 

# def set_len(df):
#     df['consec_pred_count'] = len(df)
#     return df

# df_test = df_test.sort_values(by=gb_cols+['frame'])
# consecutives = df_test['logistic_regression_preds'].diff().ne(0).cumsum()
# df_test = df_test.groupby(consecutives).apply(lambda df: set_len(df))

In [None]:
# df_test['final_preds'] = df_test['logistic_regression_preds']

In [None]:
# df_test.loc[(df_test['logistic_regression_preds'] == 1) & (df_test['consec_pred_count'] < 7),'final_preds'] = 0

In [None]:
# df_test[(df_test['final_preds'] > 0) & (df_test['label'] > 0)][gb_cols+['frame','consec_pred_count']].sort_values(by=['Year','month','frame'])

In [None]:
# print(confusion_matrix(df_test['label'], df_test['final_preds']))
# print(classification_report(df_test['label'], df_test['final_preds']))

In [None]:
# print(confusion_matrix(df_test['label'], df_test['logistic_regression_preds']))
# print(classification_report(df_test['label'], df_test['logistic_regression_preds']))

In [None]:
# fp_unc = df_uncertain_test[(df_uncertain_test['logistic_regression_preds']==1) & (df_uncertain_test['label'] == 0)]
# # g1 = fp_unc.groupby(['Year','month','day','Hour','Minute','Seconds'])
# for i,g in enumerate(g1.groups):
#     g1.get_group(g)
#     print(f"event: {'_'.join(str(i) for i in g)}, frames:")
#     print(grs[i].sort_values(by='frame')['frame'].tolist())

In [None]:
# fp = df_test[(df_test['logistic_regression_preds']==1) & (df_test['label'] == 0)]

In [None]:
# g1 = fp.groupby(['Year','month','day','Hour','Minute','Seconds'])

In [None]:
# for i,g in enumerate(g1.groups):
#     g1.get_group(g)
#     print(f"event: {'_'.join(str(i) for i in g)}, frames:")
#     print(grs[i].sort_values(by='frame')['frame'].tolist())

In [None]:
# p = model.predict(X)
# print(confusion_matrix(y, p))
# print(classification_report(y, p))

In [None]:
# from sklearn.svm import SVC

# svc1 = SVC(C=0.5, kernel='poly')#, class_weight='balanced')
# svc1 = svc1.fit(X, y)

# # t = df_test[df_test['transform'] == 20]
# # sctt = scaler.transform(t[emb_cols])

# preds_train = svc1.predict(scaled_train)
# preds = svc1.predict(scaled_test)#sctt)

# print(classification_report(df_test['label'], preds))
# print(confusion_matrix(df_test['label'], preds))

### KNN Simple

In [None]:
# for k in range(11,12):
model = knn(n_neighbors=knn_neighbors)
model.fit(X, y)

preds_train = model.predict(scaled_train)
preds = model.predict(scaled_test)

uncertain_preds = None
if use_uncertain:
    uncertain_preds = model.predict(scaled_uncertain)

set_preds(df_train, df_test, df_uncertain_test, preds_train, preds, uncertain_preds, f'knn_{knn_neighbors}')

print(confusion_matrix(df_test['label'], df_test['knn_11_preds']))
print(classification_report(df_test['label'], df_test['knn_11_preds']))

if use_uncertain:
    print(confusion_matrix(df_uncertain_test['label'], df_uncertain_test['knn_11_preds']))
    print(classification_report(df_uncertain_test['label'], df_uncertain_test['knn_11_preds']))

### KNN Bagging

In [None]:
model = knn_bagging(bootstrap=True,warm_start=True,bootstrap_features=True, max_features=1)
model.fit(X, y)

preds_train = model.predict(scaled_train)
preds = model.predict(scaled_test)

uncertain_preds = None
if use_uncertain:
    uncertain_preds = model.predict(scaled_uncertain)
    
set_preds(df_train, df_test, df_uncertain_test, preds_train, preds, uncertain_preds, f'knn_bagging')

In [None]:
# # set_preds(df_train, df_test, df_uncertain_test, preds_train, preds, uncertain_preds, f'knn_{knn_neighbors}')
# # print(confusion_matrix(df_train['label'], preds_train))
# # print(classification_report(df_train['label'], preds_train))

# # set_preds(df_train, df_test, df_uncertain_test, preds_train, preds, uncertain_preds, f'knn_{knn_neighbors}')
# print(confusion_matrix(df_test['label'], preds))
# print(classification_report(df_test['label'], preds))

# print("--------------------UNC --------------")

# print(confusion_matrix(df_uncertain_test['label'], uncertain_preds))
# print(classification_report(df_uncertain_test['label'], uncertain_preds))

### SVC

In [None]:
from sklearn.svm import SVC

kernels = ['poly', 'rbf']

for i, kernel in enumerate(kernels):
    svc1 = SVC(C=0.005, kernel=kernel)#, class_weight='balanced')
    svc1.fit(X, y)

    preds_train_svc = svc1.predict(scaled_train)
    preds_test_svc = svc1.predict(scaled_test)

    preds_unc_svc = None
    if use_uncertain:
        preds_unc_svc = model.predict(scaled_uncertain)

    set_preds(df_train, df_test, df_uncertain_test, preds_train_svc, preds_test_svc, preds_unc_svc, f'svc_{kernel}')


In [None]:
len(df_test[df_test['date'].isin(df_train['date'].tolist())])

In [None]:
print(confusion_matrix(df_test['label'], df_test['svc_poly_preds']))
print(classification_report(df_test['label'], df_test['svc_poly_preds']))


In [None]:
print(confusion_matrix(df_uncertain_test['label'], df_uncertain_test['svc_poly_preds']))
print(classification_report(df_uncertain_test['label'], df_uncertain_test['svc_poly_preds']))


In [None]:
print(confusion_matrix(df_train['label'], df_train['svc_poly_preds']))
print(classification_report(df_train['label'], df_train['svc_poly_preds']))


In [None]:

print(confusion_matrix(df_uncertain_test['label'], df_uncertain_test['svc_poly_preds']))
print(classification_report(df_uncertain_test['label'], df_uncertain_test['svc_poly_preds']))


In [None]:
# fp = df_test[(df_test['svc_poly_preds']==1) & (df_test['label'] == 0)]
# g1 = fp .groupby(['Year','month','day','Hour','Minute','Seconds'])
# for i,g in enumerate(g1.groups):
#     gr = g1.get_group(g)
#     print(f"event: {'_'.join(str(i) for i in g)}, frames:")
#     print(gr.sort_values(by='frame')['frame'].tolist())

### Logistic Reg

In [None]:
lr = logistic_reg()#max_iter=200, warm_start=True, tol=1e-1,solver='newton-cg')#class_weight='balanced')
lr.fit(X, y)
preds_train_lr = lr.predict(scaled_train)
preds_test_lr = lr.predict(scaled_test)

preds_unc_lr = None
if use_uncertain:
    preds_unc_lr = model.predict(scaled_uncertain)

set_preds(df_train, df_test, df_uncertain_test, preds_train_lr, preds_test_lr, preds_unc_lr, f'logistic_regression')
print(classification_report(df_test['label'], preds_test_lr))
print(confusion_matrix(df_test['label'], preds_test_lr))

print(classification_report(df_train['label'], preds_train_lr))
print(confusion_matrix(df_train['label'], preds_train_lr))

### SGD

In [None]:
model = sgd(class_weight='balanced')
model.fit(X, y)

preds_train = model.predict(scaled_train)
preds = model.predict(scaled_test)

uncertain_preds = None
if use_uncertain:
    uncertain_preds = model.predict(scaled_uncertain)

set_preds(df_train, df_test, df_uncertain_test, preds_train, preds, uncertain_preds, f'sgd')

print(classification_report(df_test['label'],preds))
print(confusion_matrix(df_test['label'],preds))

### Avg & Any

In [None]:
if old_run:
    gb_cols = ['part_of_quake','Year','month','day','event_idx','frame']
else:   
    gb_cols = ['part_of_quake','Year','month','day','Hour','Minute','Seconds','frame']

cls_types = [f'knn_{knn_neighbors}', 'knn_bagging', 'svc_poly', 'svc_rbf','logistic_regression','sgd']

In [None]:


if has_tr_train and has_tr_test and not run_on_raw:
    for i, cls in enumerate(cls_types):
        df_train = df_train.groupby(gb_cols).apply(lambda df: set_custom_preds(df,cls))
        df_test = df_test.groupby(gb_cols).apply(lambda df: set_custom_preds(df,cls))
        if use_uncertain:
            df_uncertain_test = df_uncertain_test.groupby(gb_cols).apply(lambda df: set_custom_preds(df, cls))
    cls_types = cls_types 
    cls_types_any_avg = [f'any_{classtype}' for i, classtype in enumerate(cls_types)] + [f'avg_{classtype}' for i, classtype in enumerate(cls_types)]

In [None]:
print(confusion_matrix(df_test['label'],df_test['logistic_regression_preds']))
print(classification_report(df_test['label'],df_test['logistic_regression_preds']))

In [None]:
df_train.to_csv(f'./out/builtpreds/{run_name}_train.csv',index=False)
df_test.to_csv(f'./out/builtpreds/{run_name}_test.csv',index=False)

if (use_uncertain):
    df_uncertain_test.to_csv(f'./out/builtpreds/{run_name}_uncertain.csv',index=False)

In [None]:
if has_tr_train:
    df_train_raw = df_train[df_train['transform'] == 20]
if  has_tr_test:
    df_test_raw = df_test[df_test['transform'] == 20]
if use_uncertain and has_tr_unc:
    df_uncertain_raw = df_uncertain_test[df_uncertain_test['transform'] == 20]

# Reports

In [None]:
run_name

In [None]:
if use_uncertain:
    print("\nUncertain:\n")
    print(classification_report(df_uncertain_test['label'], df_uncertain_test[f'svc_poly_preds']))
    print((confusion_matrix(df_uncertain_test['label'], df_uncertain_test[f'svc_poly_preds'])))


In [None]:
df_uncertain_test[df_uncertain_test['svc_poly_preds']==1][['date','frame']].sort_values(by=['date','frame']).to_csv("false_positives_uncertain_12_july_smote.csv",index=False)

In [None]:
df_uncertain_test[(df_uncertain_test['svc_poly_preds']==1) & (df_uncertain_test['label']==1)][['date','frame']]

In [None]:
import sys
import numpy as np

with open(f"results_all_contrastive_{run_name}{'_raw' if run_on_raw else ''}.txt", 'w') as f:
    f.write(f"transforms train: {'true' if has_tr_train else 'false'}\n")
    f.write(f"transforms test: {'true' if has_tr_test else 'false'}\n")
    if use_uncertain:
        f.write(f"transforms unc: {'true' if has_tr_unc else 'false'}\n")
    

    for i, cls in enumerate(cls_types):
        f.write(f"\n------{cls} {'raw' if run_on_raw else ''} based on {based_on}------\n")
 
        f.write("\nTrain:\n")
        f.write(classification_report(df_train['label'], df_train[f'{cls}_preds']))
        f.write(np.array2string(confusion_matrix(df_train['label'], df_train[f'{cls}_preds'])))
        f.write("\nTest:\n")
        f.write(classification_report(df_test['label'], df_test[f'{cls}_preds']))
        f.write(np.array2string(confusion_matrix(df_test['label'], df_test[f'{cls}_preds'])))
        if use_uncertain:
            f.write("\nUncertain:\n")
            f.write(classification_report(df_uncertain_test['label'], df_uncertain_test[f'{cls}_preds']))
            f.write(np.array2string(confusion_matrix(df_uncertain_test['label'], df_uncertain_test[f'{cls}_preds'])))

    if not run_on_raw:
        for i, cls in enumerate(cls_types_any_avg):
            f.write(f"\n------{cls} {'raw' if run_on_raw else ''} based on {based_on}------\n")
 
            f.write("\nTrain:\n")
            f.write(classification_report(df_train_raw['label'], df_train_raw[f'{cls}_preds']))
            f.write(np.array2string(confusion_matrix(df_train_raw['label'], df_train_raw[f'{cls}_preds'])))
            f.write("\nTest:\n")
            f.write(classification_report(df_test_raw['label'], df_test_raw[f'{cls}_preds']))
            f.write(np.array2string(confusion_matrix(df_test_raw['label'], df_test_raw[f'{cls}_preds'])))
            if use_uncertain:
                f.write("\nUncertain:\n")
                f.write(classification_report(df_uncertain_raw['label'], df_uncertain_raw[f'{cls}_preds']))
                f.write(np.array2string(confusion_matrix(df_uncertain_raw['label'], df_uncertain_raw[f'{cls}_preds'])))

# others

In [None]:
def set_avg_cols(df):
    for col in range(encoding_size):
        df[f'avg_{col}'] = (df[df['transform']!=20][f'{col}'].mean() + df[df['transform']==20][f'{col}'].mean())/2
    return df

In [None]:
# x_cp = df_train.copy()
# x_cp = x_cp.groupby(gb_cols).apply(lambda df: set_avg_cols(df))

In [None]:
x_test_cp = df_test.copy()
x_test_cp = x_test_cp.groupby(gb_cols).apply(lambda df: set_avg_cols(df))

3 5 7 9 10 13 15 16

In [None]:
x_test_cp[x_test_cp['avg_19'].isna()]

In [None]:
scaler = StandardScaler()

x_cp_raw = x_cp[x_cp['transform']==20]
x_test_cp_raw = x_test_cp[x_test_cp['transform']==20]

avg_emb_cols = [f'avg_{x}' for x in range(encoding_size)]

scaled_train = scaler.fit_transform(x_cp_raw[avg_emb_cols])
scaled_test = scaler.transform(x_test_cp_raw[avg_emb_cols])

In [None]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

over = SMOTE(sampling_strategy=0.1)
under = RandomUnderSampler(sampling_strategy=0.5)

steps = [('o', over), ('u', under)]
pipeline = Pipeline(steps=steps)

X, y = pipeline.fit_resample(scaled_train, x_cp_raw['label'])
print (len([x for i,x in enumerate(y) if x == 0]))
print (len([x for i,x in enumerate(y) if x == 1]))

# for k in range(11,12):
model = knn(n_neighbors=3)
model.fit(X, y)

preds_train = model.predict(scaled_train)
preds = model.predict(scaled_test)

# set_preds(df_train, df_test, df_uncertain_test, preds_train, preds, uncertain_preds, f'knn_{knn_neighbors}')
print(confusion_matrix(x_test_cp_raw['label'], preds))
print(classification_report(x_test_cp_raw['label'], preds))

In [None]:
# print("------------------------AVG CERTAIN:------------------------")
# print(classification_report(df_test['label'], df_test['avg_preds']))
# print(confusion_matrix(df_test['label'], df_test['avg_preds']))

# print("------------------------AVG UNCERTAIN:------------------------")
# print(classification_report(df_uncertain_test['label'], df_uncertain_test['avg_preds']))
# print(confusion_matrix(df_uncertain_test['label'], df_uncertain_test['avg_preds']))

In [None]:
# print("------------------------ANY CERTAIN:------------------------")
# print(classification_report(df_test['label'], df_test['any_preds']))
# print(confusion_matrix(df_test['label'], df_test['any_preds']))

# print("------------------------ANY UNCERTAIN:------------------------")
# print(classification_report(df_uncertain_test['label'], df_uncertain_test['any_preds']))
# print(confusion_matrix(df_uncertain_test['label'], df_uncertain_test['any_preds']))

In [None]:
# print("------------------------ANY RAW CERTAIN:------------------------")
# print(classification_report(df_test_raw['label'], df_test_raw['any_preds']))
# print(confusion_matrix(df_test_raw['label'], df_test_raw['any_preds']))

# print("------------------------ANY RAW UNCERTAIN:------------------------")
# print(classification_report(df_uncertain_raw['label'], df_uncertain_raw['any_preds']))
# print(confusion_matrix(df_uncertain_raw['label'], df_uncertain_raw['any_preds']))

In [None]:
# df_test[(df_test['any_preds']==1) & (df_test['label']==1) & (df_test['Year']=='1996')].sort_values(by=['transform','frame'])[['transform','label','preds','any_preds','avg_preds','frame']]

In [None]:
# df_test_raw[(df_test_raw['any_preds']==1) & (df_test_raw['label']==0)].sort_values(by=['Year','month','frame'])

In [None]:
# pred_pos = df_test[(df_test['preds']==1) & (df_test['label']==1)]['name'].tolist()
# pred_pos.sort()
# # pred_pos

In [None]:
# pred_wrong = df_test_raw[(df_test_raw['preds']==1) & (df_test_raw['label']==0)].sort_values(by=['Year','month','frame'])['name'].tolist()
# pred_wrong

### False positives (detects SQ where there is none)

In [None]:
pred_wrong_uncertain = df_uncertain_raw[(df_uncertain_raw['any_preds']==1) & (df_uncertain_raw['label']==0)].sort_values(by=['Year','month','frame'])[['Year', 'month','day','Hour','Minute','Seconds','frame']]
pred_wrong_uncertain

### False negatives (misses to detect SQ)

In [None]:
pred_wrong_uncertain = df_uncertain_raw[(df_uncertain_raw['any_preds']==0) & (df_uncertain_raw['label']==1)].sort_values(by=['Year','month','frame'])[['Year', 'month','day','Hour','Minute','Seconds','frame']]
pred_wrong_uncertain

### True positives (detects SQ correctly)

In [None]:
pred_wrong_uncertain = df_uncertain_raw[(df_uncertain_raw['any_preds']==1) & (df_uncertain_raw['label']==1)].sort_values(by=['Year','month','frame'])[['Year', 'month','day','Hour','Minute','Seconds','frame']]
pred_wrong_uncertain