# Test counters code

Try out code for disinfo countermeasures before it gets used online

In [139]:
# Try object code
import pandas as pd
import numpy as np
import os
from sklearn.feature_extraction.text import CountVectorizer


class Counter:
    def __init__(self, infile = 'CountersPlaybook_MASTER.xlsx'):
        
        # Load metadata from counters excelfile
        # FIXIT: Ungodly hack = please fix
        xlsx = pd.ExcelFile(infile)
        allamitts = xlsx.parse(['AMITT_objects'])
        dfa = allamitts['AMITT_objects']
        self.dftactics = dfa[3:15].copy()
        self.dfresponses = dfa[18:25].copy()
        self.dfactors = dfa[28:36].copy()
        self.dftechniques = dfa[39:100].copy()

        # Get counters data
        self.dfcounters = pd.read_excel(infile, sheet_name='Countermeasures')
        
        # Create cross-tables
        crossidtechs = self.splitcol(dfcounters[['ID', 'Techniques']], 
                                     'Techniques', 'Techs', '\n')
        crossidtechs = crossidtechs[crossidtechs['Techs'].notnull()]
        crossidtechs['TID'] = crossidtechs['Techs'].str.split(' ').str[0]
        crossidtechs.drop('Techs', axis=1, inplace=True)
        self.idtechnique = crossidtechs
        
        crossidres = self.splitcol(dfcounters[['ID', 'Resources needed']], 
                                   'Resources needed', 'Res', ',')
        crossidres = crossidres[crossidres['Res'].notnull()]
        self.idresource = crossidres

        
    def analyse_counter_text(self, col='Title'):
        # Analyse text in counter descriptions
        alltext = (' ').join(dfcounters[col].to_list()).lower()
        count_vect = CountVectorizer(stop_words='english')
        word_counts = count_vect.fit_transform([alltext])
        dfw = pd.DataFrame(word_counts.A, columns=count_vect.get_feature_names()).transpose()
        dfw.columns = ['count']
        dfw = dfw.sort_values(by='count', ascending=False)
        return(dfw)   

    
    def splitcol(self, df, col, newcol, divider=','):
        # Thanks https://stackoverflow.com/questions/17116814/pandas-how-do-i-split-text-in-a-column-into-multiple-rows?noredirect=1
        return (df.join(df[col]
                        .str.split(divider, expand=True).stack()
                        .reset_index(drop=True,level=1)
                        .rename(newcol)).drop(col, axis=1))

    
    # Print list of counters for each square of the COA matrix
    # Write HTML version of framework diagram to markdown file
    def write_coacounts_markdown(self, outfile = '../coacounts.md'):

        coacounts = pd.pivot_table(self.dfcounters[['Tactic', 'Response',
                                                    'ID']], index='Response', columns='Tactic', aggfunc=len, fill_value=0)

        html = '''# AMITT Courses of Action matrix:

    <table border="1">
    <tr>
    <td> </td>
    '''
        #Table heading = Tactic names
        for col in coacounts.columns.get_level_values(1):
            tid = self.create_tactic_file(col)
            html += '<td><a href="tactics/{0}counters.md">{1}</a></td>\n'.format(
                tid, col)
        html += '</tr><tr>\n'

        # number of counters per response type
        for response, counts in coacounts.iterrows(): 
            html += '<td>{}</td>\n'.format(response)
            for val in counts.values:
                html += '<td>{}</td>\n'.format(val)
            html += '</tr>\n<tr>\n'
        
        # Total per tactic
        html += '<td>TOTALS</td>\n'
        for val in coacounts.sum().values:
                html += '<td>{}</td>\n'.format(val)
        html += '</tr>\n</table>\n'           

        with open(outfile, 'w') as f:
            f.write(html)
            print('updated {}'.format(outfile))
        return

    def create_tactic_file(self, tname):
        if not os.path.exists('../tactics'):
            os.makedirs('../tactics')

        tid = tname[:tname.find(' ')]
        html = '''# Tactic {} counters\n\n'''.format(tname)
        
        for resp, counters in counter.dfcounters[dfcounters['Tactic'] == tname].groupby('Response'):
            html += '\n## {}\n'.format(resp)
            
            for c in counters.iterrows():
                html += '* {}: {} (needs {})\n'.format(c[1]['ID'], c[1]['Title'],
                                                    c[1]['Resources needed'])
#             print('Response: {}\n{}'.format(resp,
#                 counters[['ID', 'Title']]))#, 'Resources needed']]))
        datafile = '../tactics/{}_counter.md'.format(tname)
        print('Writing {}'.format(datafile))
        with open(datafile, 'w') as f:
            f.write(html)
            f.close()
        return(tid)

            
def make_object_dict(df):
    return(pd.Series(df.name.values,index=df.Id).to_dict())

counter = Counter()
counter.write_coacounts_markdown()
counter.dfcounters

Writing ../tactics/ALL_counter.md
Writing ../tactics/TA01 Strategic Planning_counter.md
Writing ../tactics/TA02 Objective Planning_counter.md
Writing ../tactics/TA03 Develop People_counter.md
Writing ../tactics/TA04 Develop Networks_counter.md
Writing ../tactics/TA05 Microtargeting_counter.md
Writing ../tactics/TA06 Develop Content_counter.md
Writing ../tactics/TA07 Channel Selection_counter.md
Writing ../tactics/TA08 Pump Priming_counter.md
Writing ../tactics/TA09 Exposure_counter.md
Writing ../tactics/TA10 Go Physical_counter.md
Writing ../tactics/TA11 Persistence_counter.md
Writing ../tactics/TA12 Measure Effectiveness_counter.md
updated ../coacounts.md


Unnamed: 0,ID,metatechnique,Title,Details,Playbook(s),Resources needed,How found,References,Incidents,Tactic,Response,Techniques
0,C00001,,Better models of info spread up the layers,,,,2019-11-workshop,,,ALL,ALL,
1,C00002,,Full spectrum analytics,,,data_scientist,2019-11-workshop,,,ALL,ALL,
2,C00003,,How can we safeguard against extremists using ...,,,,2019-11-workshop,,,ALL,ALL,
3,C00004,,Managing like a chronic disease,,,,2019-11-workshop,,,ALL,ALL,
4,C00005,,"Policy: makers, terminology, elements: a) broa...",,,,2019-11-workshop,,,ALL,ALL,
5,C00006,,Charge for social media,No corresponding AMITT technique.,,platform_admin:socialmedia,2019-11-workshop,,,TA01 Strategic Planning,D2 Deny,
6,C00007,,Create framework for BetterBusinessBureau (BBB...,No corresponding AMITT technique.,,,2019-11-workshop,,,TA01 Strategic Planning,D2 Deny,
7,C00008,,Create shared fact-checking database,Snopes is best-known example,,factcheckers,2019-11-workshop\n2019-11-search,,"I00049,I00050",TA01 Strategic Planning,D2 Deny,TA01 - Strategic Planning\nTA06 - Develop Cont...
8,C00009,resilience,Educate high profile influencers on best pract...,,,"influencers,educators",2019-11-workshop,,,TA01 Strategic Planning,D2 Deny,TA08 - Pump Priming\nT0010 - Cultivate ignoran...
9,C00010,,Enhanced privacy regulation for social media,No corresponding AMITT technique.,,government:policymakers,2019-11-workshop,,,TA01 Strategic Planning,D2 Deny,


In [124]:
for col in coacounts.columns.get_level_values(1):
    tid = col[:col.find(' ')]
    print('TID: {}'.format(tid))
    for resp, counters in counter.dfcounters[dfcounters['Tactic'] == col].groupby('Response'):
        print('Response: {}\n{}'.format(resp,
            counters[['ID', 'Title']]))#, 'Resources needed']]))

TID: AL
Response: ALL
       ID                                              Title
0  C00001         Better models of info spread up the layers
1  C00002                            Full spectrum analytics
2  C00003  How can we safeguard against extremists using ...
3  C00004                    Managing like a chronic disease
4  C00005  Policy: makers, terminology, elements: a) broa...
TID: TA01
Response: D2 Deny
        ID                                              Title
5   C00006                            Charge for social media
6   C00007  Create framework for BetterBusinessBureau (BBB...
7   C00008               Create shared fact-checking database
8   C00009  Educate high profile influencers on best pract...
9   C00010       Enhanced privacy regulation for social media
10  C00011        Media literacy. Games to identify fake news
11  C00012                                Platform regulation
12  C00013  Rating framework for news - full transcripts, ...
13  C00014        Real-tim

In [135]:
c[1]['ID']

'C00149'

In [59]:
#pd.pivot_table(counter.dfcounters, index=['ID', 'Tactic'], values=['Response'], aggfunc=lambda x: x['Response'].describe().values[1])
xx = pd.pivot_table(counter.idtechnique, index=['TID', 'ID'], aggfunc=len)
counter.idtechnique.groupby('TID').groups #Gives indices of counters

{'All': Int64Index([20, 37, 61], dtype='int64'),
 'T0001': Int64Index([19], dtype='int64'),
 'T0002': Int64Index([21, 25, 30, 54, 86], dtype='int64'),
 'T0003': Int64Index([30], dtype='int64'),
 'T0004': Int64Index([41], dtype='int64'),
 'T0005': Int64Index([35, 133], dtype='int64'),
 'T0006': Int64Index([7, 13, 22, 23, 24, 30, 86], dtype='int64'),
 'T0007': Int64Index([11, 35, 38, 54, 131, 132], dtype='int64'),
 'T0008': Int64Index([7, 13, 69, 73], dtype='int64'),
 'T0009': Int64Index([7, 13, 39, 131], dtype='int64'),
 'T0010': Int64Index([8, 91, 133], dtype='int64'),
 'T0011': Int64Index([44, 52, 131, 132], dtype='int64'),
 'T0012': Int64Index([48, 49, 50, 51, 143], dtype='int64'),
 'T0013': Int64Index([7, 13], dtype='int64'),
 'T0014': Int64Index([7, 11, 13, 69, 131], dtype='int64'),
 'T0015': Int64Index([54, 65, 69, 86, 142], dtype='int64'),
 'T0016': Int64Index([68], dtype='int64'),
 'T0017': Int64Index([35, 66, 67, 69, 86, 91, 131, 132], dtype='int64'),
 'T0018': Int64Index([62, 

In [111]:
z

Unnamed: 0,ID,metatechnique,Title,Details,Playbook(s),Resources needed,How found,References,Incidents,Tactic,Response,Techniques
145,C00148,data pollution,Add random links to network graphs,If creators are using network analysis to dete...,,platform_algorithms,2019-11-workshop,,,TA12 Measure Effectiveness,D4 Degrade,
146,C00149,data pollution,Poison the monitoring & evaluation data,,,,2019-11-workshop,,,TA12 Measure Effectiveness,D4 Degrade,TA12 - Measure Effectiveness\nT0020 - Trial co...


In [42]:
counter.dftechniques[['Id', 'name']].merge(tc.reset_index(), left_on='Id', right_on='TID')

Unnamed: 0,Id,name,TID,ID,0
0,T0001,"5Ds (dismiss, distort, distract, dismay, divide)",T0001,C00020,1
1,T0002,Facilitate State Propaganda,T0002,C00022,1
2,T0002,Facilitate State Propaganda,T0002,C00026,1
3,T0002,Facilitate State Propaganda,T0002,C00031,1
4,T0002,Facilitate State Propaganda,T0002,C00055,1
5,T0002,Facilitate State Propaganda,T0002,C00088,1
6,T0003,Leverage Existing Narratives,T0003,C00031,1
7,T0004,Competing Narratives,T0004,C00042,1
8,T0005,Center of Gravity Analysis,T0005,C00036,1
9,T0005,Center of Gravity Analysis,T0005,C00136,1


In [44]:
# Get counters per technique
pd.pivot_table(counter.idtechnique, index=['TID', 'ID'], aggfunc=len).sort_index()

TID             ID    
All             C00021    1
                C00038    1
                C00062    1
T0001           C00020    1
T0002           C00022    1
                C00026    1
                C00031    1
                C00055    1
                C00088    1
T0003           C00031    1
T0004           C00042    1
T0005           C00036    1
                C00136    1
T0006           C00008    1
                C00014    1
                C00023    1
                C00024    1
                C00025    1
                C00031    1
                C00088    1
T0007           C00012    1
                C00036    1
                C00039    1
                C00055    1
                C00133    1
                C00135    1
T0008           C00008    1
                C00014    1
                C00070    1
                C00074    1
                         ..
TA09            C00019    1
                C00028    1
                C00085    1
                C00086   

In [33]:
# Create cross-tables
counter.idresource['Res'].value_counts().sort_index().reset_index()

Unnamed: 0,index,Res
0,DHS,1
1,NGO,1
2,activists,1
3,adtech,1
4,civil_society,1
5,community_groups,1
6,data_scientist,4
7,developers,2
8,educators,7
9,factcheckers,2


In [8]:
dfcounters['metatechnique'].value_counts()

resilience          13
friction            10
data pollution       5
diversion            5
daylight             4
cleaning             3
countermessaging     1
dilution             1
Name: metatechnique, dtype: int64