## ENGLISH

In [1]:
import os
import csv

en_pairs = {}
path = 'hatemeter_data_dump'
for table in os.listdir(path):
    if '[EN]' in table:
        with open(path + os.sep + table) as f_in:
            data = csv.reader(f_in, delimiter='\t')
            header = next(data)
            if 'original' not in table:
                for row in data:
                    i, hs, cs, hstype, cstype = row
                    en_pairs[i] = (hs, cs, hstype.lower(), cstype.lower())
                    
print(len(en_pairs.keys()))

1225


#### Table with the distribution of counterspeech types in the dataset

In [6]:
from collections import Counter
from prettytable import PrettyTable
x = PrettyTable()

cstypes = [en_pairs[key][3].lower() for key in en_pairs]

cs_tags = {'negative': 0, 'denouncing': 0, 'facts': 0, 'question': 0, 'hypocrisy': 0, 'affiliation': 0, 'positive': 0, 'humor': 0, 'consequences': 0}

for t in cstypes:
    for tag in cs_tags.keys():
        if tag in t:
            cs_tags[tag] += 1
            
total = sum(cs_tags.values())

x.field_names = ['Type', 'Number', 'Percentage']
for tag in sorted(cs_tags, key = lambda x: cs_tags[x]):
    x.add_row([tag, cs_tags[tag], round(cs_tags[tag]/total*100)])
    
print(x)

+--------------+--------+------------+
|     Type     | Number | Percentage |
+--------------+--------+------------+
|   negative   |   1    |     0      |
| consequences |   7    |     0      |
| affiliation  |   18   |     1      |
|   positive   |   59   |     4      |
|    humor     |  121   |     8      |
|  hypocrisy   |  193   |     13     |
|   question   |  202   |     13     |
|  denouncing  |  289   |     19     |
|    facts     |  626   |     41     |
+--------------+--------+------------+


#### Table with the distribution of hate types in the dataset

In [4]:
hstypes = [en_pairs[key][2].lower() for key in en_pairs]

hs_tags = {'rapism': 0, 'culture': 0, 'crime': 0, 'economic': 0, 'terrorism': 0, 'islamization': 0, 'generic': 0, 'women': 0}

for t in hstypes:
    for tag in hs_tags.keys():
        if tag in t:
            hs_tags[tag] += 1
            
x = PrettyTable()
total = sum(hs_tags.values())

x.field_names = ['Type', 'Number', 'Percentage']
for tag in sorted(hs_tags, key = lambda x: hs_tags[x]):
    x.add_row([tag, hs_tags[tag], round(hs_tags[tag]/total*100)])
    
print(x)

+--------------+--------+------------+
|     Type     | Number | Percentage |
+--------------+--------+------------+
|   economic   |   54   |     4      |
|  terrorism   |   78   |     6      |
|    women     |   82   |     6      |
|    crime     |  142   |     10     |
| islamization |  160   |     11     |
|    rapism    |  228   |     16     |
|   generic    |  240   |     17     |
|   culture    |  423   |     30     |
+--------------+--------+------------+


#### Table with the distribution of counterspeech types across different hate types in the dataset

In [5]:
cross = {h: {} for h in hs_tags}
for i in en_pairs:
    for h in hs_tags:
        for c in cs_tags:
            if c in en_pairs[i][3] and h in en_pairs[i][2]:
                try:
                    cross[h][c] += 1
                except KeyError:
                    cross[h][c] = 1

x = PrettyTable()

x.field_names = ['Hate type', 'Counterspeech type', 'Number', 'Percentage for this hate type']
                    
for hs_type in cross:
    cs_types = cross[hs_type]
    total = sum(cs_types.values())
    for cs_type in sorted(cs_types, key=lambda z: cs_types[z]):
        x.add_row([hs_type, cs_type, cs_types[cs_type], round(cs_types[cs_type]/total*100)])

print(x)

+--------------+--------------------+--------+-------------------------------+
|  Hate type   | Counterspeech type | Number | Percentage for this hate type |
+--------------+--------------------+--------+-------------------------------+
|    rapism    |    consequences    |   1    |               0               |
|    rapism    |    affiliation     |   3    |               1               |
|    rapism    |       humor        |   7    |               2               |
|    rapism    |      positive      |   9    |               3               |
|    rapism    |     hypocrisy      |   34   |               12              |
|    rapism    |      question      |   39   |               13              |
|    rapism    |     denouncing     |   62   |               21              |
|    rapism    |       facts        |  137   |               47              |
|   culture    |    affiliation     |   3    |               1               |
|   culture    |      positive      |   28   |      

## ITALIAN

In [6]:
it_pairs = {}

for table in os.listdir(path):
    if 'IT' in table:
        with open(path + os.sep + table) as f_in:
            data = csv.reader(f_in, delimiter='\t')
            header = next(data)
            for row in data:
                try:
                    i, hs, cs, cstype, hstype = row
                    it_pairs[i] = (hs, cs, hstype, cstype)
                except ValueError:
                    i, hs, cs, cstype, hstype = row[:5]
                    it_pairs[i] = (hs, cs, hstype.lower(), cstype.lower())
                    
print(len(it_pairs.keys()))

577


#### Table with the distribution of counterspeech types in the dataset

In [7]:
x = PrettyTable()

cstypes = [it_pairs[key][3].lower() for key in it_pairs]

cs_tags = {'negative': 0, 'denouncing': 0, 'facts': 0, 'question': 0, 'hypocrisy': 0, 'affiliation': 0, 'positive': 0, 'humor': 0, 'consequences': 0}

for t in cstypes:
    for tag in cs_tags.keys():
        if tag in t:
            cs_tags[tag] += 1
            
total = sum(cs_tags.values())

x.field_names = ['Type', 'Number', 'Percentage']
for tag in sorted(cs_tags, key = lambda x: cs_tags[x]):
    x.add_row([tag, cs_tags[tag], round(cs_tags[tag]/total*100)])
    
print(x)

+--------------+--------+------------+
|     Type     | Number | Percentage |
+--------------+--------+------------+
| consequences |   0    |     0      |
|   negative   |   2    |     0      |
| affiliation  |   5    |     1      |
|   positive   |   18   |     3      |
|    humor     |   19   |     3      |
|  hypocrisy   |   44   |     7      |
|  denouncing  |   84   |     12     |
|   question   |  131   |     19     |
|    facts     |  369   |     55     |
+--------------+--------+------------+


#### Table with the distribution of hate types in the dataset

In [8]:
hstypes = [it_pairs[key][2].lower() for key in it_pairs]

hs_tags = {'terrorismo': 0, 'criminalità': 0, 'povertà': 0, 'islamizzazione': 0, 'donne': 0, 'generico': 0}

for t in hstypes:
    for tag in hs_tags.keys():
        if tag in t:
            hs_tags[tag] += 1
            
x = PrettyTable()
total = sum(hs_tags.values())

x.field_names = ['Type', 'Number', 'Percentage']
for tag in sorted(hs_tags, key = lambda x: hs_tags[x]):
    x.add_row([tag, hs_tags[tag], round(hs_tags[tag]/total*100)])
    
print(x)

+----------------+--------+------------+
|      Type      | Number | Percentage |
+----------------+--------+------------+
|    generico    |   15   |     3      |
|     donne      |   27   |     5      |
|  criminalità   |   74   |     12     |
|    povertà     |  100   |     17     |
| islamizzazione |  176   |     29     |
|   terrorismo   |  206   |     34     |
+----------------+--------+------------+


#### Conversion of Amnesty types into our types

In [10]:
converted = {}
conversion = {'terrorismo': 'terrorism', 'criminalità': 'crime', 'povertà': 'economics', 'islamizzazione': 'islamization', 'donne': 'women', 'generico': 'generic'}

for t in hstypes:
    for tag in hs_tags.keys():
        if tag in t:
            try:
                converted[conversion[tag]] += 1
            except KeyError:
                converted[conversion[tag]] = 1
            
x = PrettyTable()
total = sum(converted.values())

x.field_names = ['Type', 'Number', 'Percentage']
for tag in sorted(converted, key = lambda x: converted[x]):
    x.add_row([tag, converted[tag], round(converted[tag]/total*100)])
    
print(x)

+--------------+--------+------------+
|     Type     | Number | Percentage |
+--------------+--------+------------+
|   generic    |   15   |     3      |
|    women     |   27   |     5      |
|    crime     |   74   |     12     |
|  economics   |  100   |     17     |
| islamization |  176   |     29     |
|  terrorism   |  206   |     34     |
+--------------+--------+------------+


#### Table with the distribution of counterspeech types across hate types in the dataset

In [11]:
cross = {h: {} for h in hs_tags}
for i in it_pairs:
    for h in hs_tags:
        for c in cs_tags:
            if c in it_pairs[i][3] and h in it_pairs[i][2]:
                try:
                    cross[h][c] += 1
                except KeyError:
                    cross[h][c] = 1
                    
x = PrettyTable()

x.field_names = ['Hate type', 'Counterspeech type', 'Number', 'Percentage for this hate type']
                    
for hs_type in cross:
    cs_types = cross[hs_type]
    total = sum(cs_types.values())
    for cs_type in sorted(cs_types, key=lambda z: cs_types[z]):
        x.add_row([hs_type, cs_type, cs_types[cs_type], round(cs_types[cs_type]/total*100)])

print(x)

+----------------+--------------------+--------+-------------------------------+
|   Hate type    | Counterspeech type | Number | Percentage for this hate type |
+----------------+--------------------+--------+-------------------------------+
|   terrorismo   |      negative      |   1    |               0               |
|   terrorismo   |       humor        |   3    |               1               |
|   terrorismo   |      positive      |   4    |               2               |
|   terrorismo   |    affiliation     |   5    |               2               |
|   terrorismo   |     hypocrisy      |   7    |               3               |
|   terrorismo   |      question      |   31   |               13              |
|   terrorismo   |     denouncing     |   40   |               17              |
|   terrorismo   |       facts        |  144   |               61              |
|  criminalità   |      positive      |   2    |               2               |
|  criminalità   |       hum

## FRENCH

In [4]:
fr_pairs = {}
path = 'hatemeter_data_dump'
for table in os.listdir(path):
    if 'FR' in table:
        with open(path + os.sep + table) as f_in:
            data = csv.reader(f_in, delimiter='\t')
            header = next(data)
            for row in data:
                try:
                    hsfr, csfr, hsen, hstype, csen, cstype = row
                    if hstype != '':
                        current_hstype = hstype
                    fr_pairs[csfr] = (hsfr, csfr, hsen, csen, current_hstype.lower(), cstype.lower())
                except ValueError:
                    print(row)
                    
print(len(fr_pairs.keys()))

["L'islamophobie est un droit. Combattre l'islam par un devoir.", 'La liberté de conscience aussi.', 'Islamophobia is a right. Fighting Islam is']
[' a duty.', '', 'Also freedom of conscience.', 'humor']
['', "C'est quoi un voile islamique ? Encore une fois, il me faut du contexte !  Islam politique ? C'est à dire ?", '', '', 'What is an Islamic veil? Again, I need a context!']
[' Political Islam? What do you mean?', 'question']
['', 'Auparavant : " les femmes musulmanes portent un foulard car elles sont soumises"  Maintenant : " le voile islamique est la démonstrations d\'un islam politique"  Donc les femmes musulmanes n\'ont pas de libre choix... mais quand même! Elle font le choix de soutenir un islam politique! Trouvez l\'erreur...', '', '', 'Previously: "Muslim women wear the veil because they are subjected"']
[' Now: "The Islamic veil is the demonstration of a political Islam"']
[' So Muslim women have no choice ... once again! They make the choice to support a political Islam! F

In [5]:
print(fr_pairs)

{"L'islamophobie est un délit.": ('“L’islamophobie” c’est un moyen de nous retirer notre liberté d’expression.', "L'islamophobie est un délit.", 'Islamophobia  is a way to take away our freedom of expression.', 'Islamophobia is a crime.', 'generic', 'consequences'), 'La critique et le blasphémé sont deux choses différentes.': ('', 'La critique et le blasphémé sont deux choses différentes.', '', 'Critics and blasphemy are two different things.', 'generic', 'facts'), "c'est faux.": ('L’islam va à l’encontre des valeurs francaises.', "c'est faux.", 'Islam is against the french values.', "it's wrong.", 'generic', 'other'), 'Les musulmanes ne veulent pas interdire le blasphème.': ('', 'Les musulmanes ne veulent pas interdire le blasphème.', '', "Muslims don't want to ban blasphemy.", 'generic', 'facts'), "L'islamophobie est une maladie comme tout racisme.": ('', "L'islamophobie est une maladie comme tout racisme.", '', 'Islamophobia is a disease like any racism.', 'generic', 'facts'), "L'is

#### Table with the distribution of counterspeech types in the dataset

In [7]:
x = PrettyTable()

cstypes = [fr_pairs[key][5].lower() for key in fr_pairs]

cs_tags = {'negative': 0, 'denouncing': 0, 'facts': 0, 'question': 0, 'hypocrisy': 0, 'affiliation': 0, 'positive': 0, 'humor': 0, 'consequences': 0}

for t in cstypes:
    for tag in cs_tags.keys():
        if tag in t:
            cs_tags[tag] += 1
            
total = sum(cs_tags.values())

x.field_names = ['Type', 'Number', 'Percentage']
for tag in sorted(cs_tags, key = lambda x: cs_tags[x]):
    x.add_row([tag, cs_tags[tag], round(cs_tags[tag]/total*100)])
    
print(x)

+--------------+--------+------------+
|     Type     | Number | Percentage |
+--------------+--------+------------+
|   negative   |   2    |     0      |
| affiliation  |   3    |     1      |
| consequences |   5    |     1      |
|   question   |   26   |     6      |
|   positive   |   29   |     7      |
|  hypocrisy   |   48   |     12     |
|    humor     |   59   |     14     |
|  denouncing  |   77   |     19     |
|    facts     |  164   |     40     |
+--------------+--------+------------+


#### Table with the distribution of hate types in the dataset

In [8]:
hstypes = [fr_pairs[key][4].lower() for key in fr_pairs]

hs_tags = {'rapism': 0, 'culture': 0, 'crime': 0, 'economic': 0, 'terrorism': 0, 'islamization': 0, 'generic': 0, 'women': 0}

for t in hstypes:
    for tag in hs_tags.keys():
        if tag in t:
            hs_tags[tag] += 1
            
x = PrettyTable()
total = sum(hs_tags.values())

x.field_names = ['Type', 'Number', 'Percentage']
for tag in sorted(hs_tags, key = lambda x: hs_tags[x]):
    x.add_row([tag, hs_tags[tag], round(hs_tags[tag]/total*100)])
    
print(x)

+--------------+--------+------------+
|     Type     | Number | Percentage |
+--------------+--------+------------+
|    rapism    |   0    |     0      |
|   economic   |   8    |     2      |
|    crime     |   11   |     3      |
| islamization |   32   |     8      |
|  terrorism   |   49   |     13     |
|    women     |   80   |     21     |
|   culture    |   99   |     26     |
|   generic    |  109   |     28     |
+--------------+--------+------------+


#### Table with the distribution of counterspeech types across hate types in the dataset

In [9]:
cross = {h: {} for h in hs_tags}
for i in fr_pairs:
    for h in hs_tags:
        for c in cs_tags:
            if c in fr_pairs[i][5] and h in fr_pairs[i][4]:
                try:
                    cross[h][c] += 1
                except KeyError:
                    cross[h][c] = 1
                    
x = PrettyTable()

x.field_names = ['Hate type', 'Counterspeech type', 'Number', 'Percentage for this hate type']
                    
for hs_type in cross:
    cs_types = cross[hs_type]
    total = sum(cs_types.values())
    for cs_type in sorted(cs_types, key=lambda z: cs_types[z]):
        x.add_row([hs_type, cs_type, cs_types[cs_type], round(cs_types[cs_type]/total*100)])

print(x)

+--------------+--------------------+--------+-------------------------------+
|  Hate type   | Counterspeech type | Number | Percentage for this hate type |
+--------------+--------------------+--------+-------------------------------+
|   culture    |      negative      |   1    |               1               |
|   culture    |    consequences    |   1    |               1               |
|   culture    |      question      |   7    |               6               |
|   culture    |     hypocrisy      |   7    |               6               |
|   culture    |      positive      |   12   |               11              |
|   culture    |       humor        |   16   |               14              |
|   culture    |     denouncing     |   23   |               20              |
|   culture    |       facts        |   46   |               41              |
|    crime     |     hypocrisy      |   1    |               6               |
|    crime     |    consequences    |   1    |      