In [2]:
import pandas as pd
from pandas.api.types import is_string_dtype
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
import math
import re


base = './EU DSA, DMA and consultations/Unzipped files/'
fp = 'contributions-New_Competition_Tool/contributions-New_Competition_Tool.csv'

NCT_contributions = pd.read_csv(base+fp,sep=';',encoding='windows-1252')

In [3]:
pat = re.compile('^.{0,7}Please explain')
column_names = NCT_contributions.columns.tolist()

[col for col in column_names if re.search(pat,col)]

for index,column_name in enumerate(NCT_contributions):
    
    if pat.search(column_name):
        
        column_names[index] = column_names[index] + ' [' + column_names[index-1] + ']'
        
NCT_contributions.columns = column_names

In [4]:
letters = list('interoperability')
length = len(letters)

pats = [''.join(letters[0:n])+'..'+''.join(letters[n+2:length]) for n in range(length)]
pats = '|'.join(pats+['interoperable','interoperate'])

additional_cols = ['Reference','Organisation name','Country']

not_anonymous = NCT_contributions.dropna(subset=['Organisation name'])
in_english = not_anonymous[not_anonymous['Language'] == 'English']
filtered_NCT_contributions = in_english.sort_values('Organisation name')

In [5]:
interop_question = '.*Please indicate which are these other market scenarios that in your view qualify'
interop_question = [col for col in filtered_NCT_contributions.columns if re.search(interop_question,col)][0]
name_ratings = filtered_NCT_contributions[['Organisation name',interop_question]]
name_ratings = name_ratings.dropna(subset=[interop_question])
name_ratings[name_ratings[interop_question].str.contains(pats,regex=True)].reset_index(drop=True)

Unnamed: 0,Organisation name,7.3. Please indicate which are these other market scenarios that in your view qualify as structural competition problems and rate them according to their importance from 0 to 4 (0 = no knowledge/no experience; 1 = no importance/no relevance; 2 = somewhat important; 3 = important; 4 = very important).
0,A1 Telekom Austria Group,In coordination with our response to the DSA c...
1,Access Now Europe,Other market scenarios that qualify as structu...
2,ETNO (European Telecommunications Network Oper...,In coordination with our response to the DSA c...
3,GSMA,In coordination with our response to the DSA c...
4,ORANGE,Other scenarios qualifying as structural compe...


In [7]:
question_frequencies = []

for user_type,contributions in filtered_NCT_contributions.groupby('User type'):
    
    all_interoperability = []
    
    for index,row in contributions.iterrows():
        
        test = row.str.contains(pats,na=False)
        as_list = []
        
        if test.any():
            
            for col in additional_cols:
            
                as_list.extend([(col,row[col])])
                
            filtered_row = row[test]
            as_list.extend(list(filtered_row.items()))
            
            all_interoperability.append(as_list)

    length = len(all_interoperability)

    as_doc = Document()
    
    style = as_doc.styles['Normal']
    font = style.font
    font.name = 'Calibri'
    
    title = 'All responses mentioning "interoperability" (DSA consultation) - '
    title += user_type
    title += ' (n='+str(length)+')'
    
    as_doc.add_heading(title,0)
    
    interop_questions = []

    for element in all_interoperability:
        
        interop_question = element[3][0]
        interop_questions.append(interop_question)
        
        reference,organisation_name,country = element[0:3]
        
        heading_2 = reference[1]
        heading_2 += ' - '+organisation_name[1]
        heading_2 += ' ('+country[1]+')'
        
        as_doc.add_heading(heading_2,level=2)
        
        for question,content in element[3:]:
            
            as_doc.add_heading(question,level=3)
            split = content.splitlines()
            
            for text in split:
                
                words = re.split('(\W)',text)
                paragraph = as_doc.add_paragraph(words[0])
                
                for word in words[1:]:
                    
                    if re.search(pats,word):
                        
                        run = paragraph.add_run(word)
                        run.bold = True
                        
                    else:
                        
                        paragraph.add_run(word)
                        
                paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
                
    frequencies = user_type,pd.Series(interop_questions,dtype='object').value_counts()
    question_frequencies.append(frequencies)
                
    fp = './EU DSA, DMA and consultations/Filtered contributions - NOT FOR ANNOTATING/'
    user_type = re.sub('/','+',user_type)
    print(user_type,len(all_interoperability))
    fp += 'NCT Interoperability responses - '+user_type+'.docx'
    
    as_doc.save(fp)

Academic+Research Institution 1
Business Association 10
Company+Business organisation 13
Consumer Organisation 1
NGO (Non-governmental organisation) 6
Other 2
Public authority 1
Trade Union 0


In [40]:
question_frequencies[2]

('Company/Business organisation',
 6.2. Please indicate which are these other market features/elements that can be a source or part of the reasons for a structural competition problem in a given market and rate them according to their importance from 0 to 4 (0 = no knowledge/no experience; 1 = no importance/no relevance; 2 = somewhat important; 3 = important; 4 = very important).                                                                                                                                                                                                                                                                                                                                                                                  3
 9.1. Please explain your answer. If you replied yes, please also indicate the type of intervention that would be needed. [9. Do you think that there is a need for the Commission to be able to intervene in situations where structural competition 