In [1]:
import pickle
def calculate_wer(reference, hypothesis):
	ref_words = reference.split()
	hyp_words = hypothesis.split()
	# Counting the number of substitutions, deletions, and insertions
	substitutions = sum(1 for ref, hyp in zip(ref_words, hyp_words) if ref != hyp)
	deletions = len(ref_words) - len(hyp_words)
	insertions = len(hyp_words) - len(ref_words)
	# Total number of words in the reference text
	total_words = len(ref_words)
	# Calculating the Word Error Rate (WER)
	wer = (substitutions + deletions + insertions) / total_words
	return wer

def calculate_wer_from_files(reference_file, hypothesis_file):
    with open(reference_file, 'rb') as ref_file:
        references = pickle.load(ref_file)
    with open(hypothesis_file, 'rb') as hyp_file:
        hypothesiss = pickle.load(hyp_file)
    if len(hypothesiss) != 100 :
        hypothesiss = hypothesiss[0]
    
    error = [calculate_wer(reference, hypothesis) for reference, hypothesis in zip(references, hypothesiss)]
    return error

In [2]:
import glob 
import pickle
import pandas as pd
water_marked_files = sorted(glob.glob('Dataset/Watermarked/en/*/*'))
attacked_files = sorted(glob.glob('Dataset/Attacked/*/*/*'))

In [3]:
d= {}
for wfile in water_marked_files:
    for afile in attacked_files:
        key = afile.split('/')[2]+' '+ afile.split('/')[-2]  + ' '
        if 'rephrased'  in afile or 'dipper' in afile:
           key+= "".join(afile.split('/')[-1].split('_')[-1]) 
        key = key.lower().replace('.pkl' , '').replace('dipper' , 'rephrased').replace('sirrephrased' , 'rephrased').title()
        key = tuple(key.split(' '))
        if ('sir' in wfile.lower() and 'sir' in afile.lower()):
             d[key] = calculate_wer_from_files(wfile, afile)
        elif('semantics' not in wfile.lower() and 'semantics' not in afile.lower()):
               d[key] = calculate_wer_from_files(wfile, afile)
        else:
                d[key] = calculate_wer_from_files(wfile, afile)
      

In [4]:
df = pd.DataFrame(d).describe()
df = df.sort_index(axis = 1)

In [5]:
styled_df = df.style.set_table_styles([
    {'selector': 'th.level0',
     'props': [('text-align', 'center')]
    },
    {'selector': 'th',
     'props': [('background-color', '#f4f4f9'), ('color', 'black')]
    },
    {'selector': 'td',
     'props': [('background-color', '#f9f9f9')]
    },
    # Add horizontal lines after each row
    {'selector': 'tr',
     'props': [('border-bottom', '1px solid black')]
    },
     {'selector': 'td',
     'props': [('border-bottom', '1px solid black')]
    }
], overwrite=True)

In [6]:
styled_df

Unnamed: 0_level_0,Normaltranslation,Normaltranslation,Normaltranslation,Paraphraed_Pivottranslation,Paraphraed_Pivottranslation,Paraphraed_Pivottranslation,Paraphrased_Normaltranslation,Paraphrased_Normaltranslation,Paraphrased_Normaltranslation,Pivottranslation,Pivottranslation,Pivottranslation,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Recursivepara,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased,Translation_Paraphrased
Unnamed: 0_level_1,Kwg,Semantics,Sir,Kwg,Semantics,Sir,Kwg,Semantics,Sir,Kwg,Semantics,Sir,Kwg,Kwg,Kwg,Kwg,Kwg,Semantics,Semantics,Semantics,Semantics,Semantics,Sir,Sir,Sir,Sir,Sir,Kwg,Kwg,Kwg,Kwg,Kwg,Semantics,Semantics,Semantics,Semantics,Semantics,Sir,Sir,Sir,Sir,Sir
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Rephrased0,Rephrased1,Rephrased2,Rephrased3,Rephrased4,Rephrased0,Rephrased1,Rephrased2,Rephrased3,Rephrased4,Rephrased0,Rephrased1,Rephrased2,Rephrased3,Rephrased4,Rephrased0,Rephrased1,Rephrased2,Rephrased3,Rephrased4,Rephrased0,Rephrased1,Rephrased2,Rephrased3,Rephrased4,Rephrased0,Rephrased1,Rephrased2,Rephrased3,Rephrased4
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.509325,0.500989,0.364616,0.419878,0.42236,0.401771,0.511323,0.503679,0.3688,0.414078,0.421901,0.395714,0.486053,0.441264,0.407404,0.381314,0.357723,0.455314,0.385146,0.342145,0.309242,0.286088,0.345031,0.319735,0.295381,0.277451,0.259165,0.475217,0.427193,0.391452,0.36743,0.34762,0.440675,0.367375,0.325221,0.295358,0.272534,0.338743,0.309645,0.287254,0.266663,0.250289
std,0.214982,0.226645,0.280185,0.22942,0.226867,0.211096,0.229098,0.23017,0.279948,0.2197,0.219969,0.2069,0.222866,0.227531,0.226309,0.229715,0.230729,0.230668,0.237615,0.235423,0.22567,0.217648,0.280975,0.269185,0.252109,0.240366,0.227999,0.225257,0.229592,0.226463,0.227273,0.229188,0.236036,0.232527,0.227409,0.217529,0.206412,0.2764,0.261494,0.249573,0.235618,0.22194
min,0.112676,0.112676,0.045375,0.130019,0.113346,0.139535,0.108652,0.101974,0.031414,0.116635,0.095064,0.152824,0.126761,0.122736,0.120724,0.10664,0.10664,0.104628,0.078695,0.049904,0.040307,0.042226,0.034904,0.022181,0.031423,0.044266,0.027726,0.108652,0.112676,0.110664,0.092555,0.094567,0.101754,0.06947,0.03263,0.03263,0.03263,0.022556,0.038394,0.036649,0.038394,0.022181
25%,0.367724,0.378507,0.13413,0.265894,0.273646,0.275061,0.366123,0.376755,0.135812,0.269174,0.284349,0.269886,0.341562,0.294895,0.258753,0.23535,0.207056,0.305763,0.239281,0.198738,0.178431,0.160536,0.112578,0.114074,0.106796,0.094185,0.089228,0.323779,0.281147,0.244689,0.224192,0.207435,0.292935,0.234994,0.194788,0.170645,0.150742,0.121361,0.109201,0.102064,0.091127,0.090986
50%,0.430924,0.424534,0.291722,0.314814,0.317161,0.315299,0.414946,0.418194,0.291404,0.32821,0.318768,0.306851,0.39448,0.347246,0.317651,0.292514,0.272348,0.392344,0.325104,0.267401,0.23495,0.206737,0.268229,0.245269,0.225601,0.214713,0.201337,0.385813,0.327335,0.297949,0.270881,0.258774,0.373559,0.294006,0.254831,0.219474,0.20345,0.255897,0.239602,0.221844,0.196598,0.187485
75%,0.642473,0.594196,0.540166,0.485725,0.498993,0.45562,0.647288,0.598753,0.544735,0.464549,0.50512,0.457444,0.582949,0.550427,0.477121,0.452899,0.434508,0.56237,0.42491,0.40985,0.353429,0.315263,0.495656,0.401405,0.380979,0.363833,0.321128,0.582586,0.515523,0.463851,0.436682,0.416313,0.518009,0.394019,0.369621,0.328541,0.297462,0.487077,0.392068,0.361286,0.337994,0.313095
max,0.992366,1.0,1.0,0.992366,1.0,0.983471,0.992063,0.991736,1.0,0.993333,0.994152,0.992366,1.0,1.0,0.993289,1.0,1.0,1.0,0.991736,1.0,0.992063,0.991379,1.0,1.0,0.989051,1.0,0.994152,1.0,1.0,1.0,0.992063,1.0,1.0,0.984127,1.0,1.0,0.956897,1.0,0.9967,1.0,1.0,1.0


In [7]:
def edit_distance(str1, str2):
    len1 = len(str1)
    len2 = len(str2)
    print(len1, len2)
    
    # Create a table to store results of subproblems
    dp = [[0] * (len2 + 1) for _ in range(len1 + 1)]
    
    # Fill dp[][] in bottom up manner
    for i in range(len1 + 1):
        for j in range(len2 + 1):
            # If first string is empty, only option is to
            # insert all characters of second string
            if i == 0:
                dp[i][j] = j
                
            # If second string is empty, only option is to
            # remove all characters of first string
            elif j == 0:
                dp[i][j] = i
                
            # If last characters are same, ignore last char
            # and recur for remaining string
            elif str1[i - 1] == str2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]
                
            # If last character are different, consider all
            # possibilities and find minimum
            else:
                dp[i][j] = 1 + min(dp[i][j - 1],      # Insert
                                   dp[i - 1][j],      # Remove
                                   dp[i - 1][j - 1])  # Replace
    
    return dp[len1][len2] / len1


def calculate_edit_from_files(reference_file, hypothesis_file):
    with open(reference_file, 'rb') as ref_file:
        references = pickle.load(ref_file)
    with open(hypothesis_file, 'rb') as hyp_file:
        hypothesiss = pickle.load(hyp_file)
    if len(hypothesiss) != 100 :
        hypothesiss = hypothesiss[0]
    
    error = [edit_distance(reference, hypothesis) for reference, hypothesis in zip(references[:1], hypothesiss[:1])]
    return error


In [8]:
d= {}
for wfile in water_marked_files:
    for afile in attacked_files:
        key = afile.split('/')[2]+' '+ afile.split('/')[-2]  + ' '
        if 'rephrased'  in afile or 'dipper' in afile:
           key+= "".join(afile.split('/')[-1].split('_')[-1]) 
        key = key.lower().replace('.pkl' , '').replace('dipper' , 'rephrased').replace('sirrephrased' , 'rephrased').title()
        key = tuple(key.split(' '))
        if ('sir' in wfile.lower() and 'sir' in afile.lower()):
             d[key] = calculate_edit_from_files(wfile, afile)
        elif('semantics' not in wfile.lower() and 'semantics' not in afile.lower()):
               d[key] = calculate_edit_from_files(wfile, afile)
        else:
                d[key] = calculate_edit_from_files(wfile, afile)
      

1481 1439
1481 1459
1481 610
1481 1040
1481 1055
1481 906
1481 1394
1481 1433
1481 574
1481 1090
1481 1083
1481 916
1481 465
1481 411
1481 398
1481 399
1481 393
1481 1359
1481 1223
1481 1184
1481 1101
1481 1057
1481 1265
1481 894
1481 883
1481 811
1481 654
1481 480
1481 398
1481 392
1481 398
1481 384
1481 1345
1481 1171
1481 1192
1481 1119
1481 1080
1481 1275
1481 870
1481 822
1481 774
1481 640
1520 1439
1520 1459
1520 610
1520 1040
1520 1055
1520 906
1520 1394
1520 1433
1520 574
1520 1090
1520 1083
1520 916
1520 465
1520 411
1520 398
1520 399
1520 393
1520 1359
1520 1223
1520 1184
1520 1101
1520 1057
1520 1265
1520 894
1520 883
1520 811
1520 654
1520 480
1520 398
1520 392
1520 398
1520 384
1520 1345
1520 1171
1520 1192
1520 1119
1520 1080
1520 1275
1520 870
1520 822
1520 774
1520 640
2190 1439
2190 1459
2190 610
2190 1040
2190 1055
2190 906
2190 1394
2190 1433
2190 574
2190 1090
2190 1083
2190 916
2190 465
2190 411
2190 398
2190 399
2190 393
2190 1359
2190 1223
2190 1184
2190 1101
219

In [9]:
d

{('Normaltranslation', 'Kwg', ''): [0.5958904109589042],
 ('Normaltranslation', 'Semantics', ''): [0.5972602739726027],
 ('Normaltranslation', 'Sir', ''): [0.8022831050228311],
 ('Paraphraed_Pivottranslation', 'Kwg', ''): [0.6579908675799087],
 ('Paraphraed_Pivottranslation', 'Semantics', ''): [0.6570776255707762],
 ('Paraphraed_Pivottranslation', 'Sir', ''): [0.6831050228310502],
 ('Paraphrased_Normaltranslation', 'Kwg', ''): [0.6273972602739726],
 ('Paraphrased_Normaltranslation', 'Semantics', ''): [0.6132420091324201],
 ('Paraphrased_Normaltranslation', 'Sir', ''): [0.828310502283105],
 ('Pivottranslation', 'Kwg', ''): [0.6515981735159817],
 ('Pivottranslation', 'Semantics', ''): [0.6534246575342466],
 ('Pivottranslation', 'Sir', ''): [0.6831050228310502],
 ('Recursivepara', 'Sir', 'Rephrased0'): [0.8529680365296803],
 ('Recursivepara', 'Sir', 'Rephrased1'): [0.8721461187214612],
 ('Recursivepara', 'Sir', 'Rephrased2'): [0.8753424657534247],
 ('Recursivepara', 'Sir', 'Rephrased3'): 

In [7]:
from bert_score import score

def calculate_bert_score_from_files(reference_file, hypothesis_file):
    with open(reference_file, 'rb') as ref_file:
        references = pickle.load(ref_file)
    with open(hypothesis_file, 'rb') as hyp_file:
        hypothesiss = pickle.load(hyp_file)
    if len(hypothesiss) != 100 :
        hypothesiss = hypothesiss[0]
    error = []
    s = score(references, hypothesiss, lang="en", verbose=True)
    error.append((s[0].item(), s[1].item(), s[2].item()))
    return error

d= {}
for wfile in water_marked_files:
    for afile in attacked_files:
        key = afile.split('/')[2]+' '+ afile.split('/')[-2]  + ' '
        if 'rephrased'  in afile or 'dipper' in afile:
           key+= "".join(afile.split('/')[-1].split('_')[-1]) 
        key = key.lower().replace('.pkl' , '').replace('dipper' , 'rephrased').replace('sirrephrased' , 'rephrased').title()
        key = tuple(key.split(' '))
        if ('sir' in wfile.lower() and 'sir' in afile.lower()):
             d[key] = calculate_bert_score_from_files(wfile, afile)
        elif('semantics' not in wfile.lower() and 'semantics' not in afile.lower()):
               d[key] = calculate_bert_score_from_files(wfile, afile)
        else:
                d[key] = calculate_bert_score_from_files(wfile, afile)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [12]:
d

{('Normaltranslation',
  'Kwg',
  ''): [(tensor([0.8491]), tensor([0.8962]), tensor([0.8720]))],
 ('Normaltranslation',
  'Semantics',
  ''): [(tensor([0.8431]), tensor([0.8918]), tensor([0.8668]))],
 ('Normaltranslation',
  'Sir',
  ''): [(tensor([0.8158]), tensor([0.8164]), tensor([0.8161]))],
 ('Paraphraed_Pivottranslation',
  'Kwg',
  ''): [(tensor([0.8379]), tensor([0.9094]), tensor([0.8722]))],
 ('Paraphraed_Pivottranslation',
  'Semantics',
  ''): [(tensor([0.8340]), tensor([0.8927]), tensor([0.8624]))],
 ('Paraphraed_Pivottranslation',
  'Sir',
  ''): [(tensor([0.8339]), tensor([0.9054]), tensor([0.8682]))],
 ('Paraphrased_Normaltranslation',
  'Kwg',
  ''): [(tensor([0.8369]), tensor([0.8800]), tensor([0.8579]))],
 ('Paraphrased_Normaltranslation',
  'Semantics',
  ''): [(tensor([0.8327]), tensor([0.8760]), tensor([0.8538]))],
 ('Paraphrased_Normaltranslation',
  'Sir',
  ''): [(tensor([0.8085]), tensor([0.7948]), tensor([0.8016]))],
 ('Pivottranslation',
  'Kwg',
  ''): [(ten