In [None]:
import json
import re

sectors_list = [
    'Shelter',
    'Food Security',
    'Agriculture',
    'Education',
    'Health',
    'Logistics',
    'Protection',
    'Livelihoods',
    'Wash',
    'Nutrition']

In [None]:
def order_dict(x):
    cleaned_x = {k: v for k, v in x.items() if str(k) != "[]" and str(v) != "{}"}

    if "General Overview" in list(cleaned_x.keys()):
        first_dict = {"General Overview": cleaned_x["General Overview"]}
        second_dict = {k: v for k, v in cleaned_x.items() if k != "General Overview"}
        y = {**first_dict, **second_dict}
        return y

    elif "['General Overview']" in list(cleaned_x.keys()):
        first_dict = {"[General Overview]": cleaned_x["['General Overview']"]}
        second_dict = {
            str(k): str(v) for k, v in cleaned_x.items() if k != "['General Overview']"
        }
        y = {**first_dict, **second_dict}
        return y

    else:
        return cleaned_x

In [None]:
def omit_punctuation(text):
    # layout clean
    clean_key = text.replace("'", '').replace("[", '').replace("]", '')

    # omit pillar of any
    if '->' in clean_key:
        clean_key = clean_key.split('->')[1]

    return clean_key

def clean_characters(text):
    #clean for latex characters
    latex_text = text.replace('%', '\%').replace('$', '\$')

    #strip punctuation
    latex_text = re.sub(r'\s([?.!"](?:\s|$))', r'\1', latex_text)

    return latex_text

In [None]:
def get_overleaf_text_sectors(sectors_dict):

    final_str = ''

    for col in sectors_list:
        one_part = sectors_dict[col]
        final_str += '\subsection{' + col + '}\n'
        for key_one_sub in ['Most affected population groups', 'Key trends', 'Needs, severity and linkages with other sectors']:
            final_str += '\subsubsection*{' + key_one_sub + '}\n'

            dict_treated = order_dict(one_part[key_one_sub])

            if len(dict_treated)>1:
                
                for key, value in dict_treated.items(): 
                    final_str += "\paragraph{" + omit_punctuation(key) + '}\n'
                    final_str += clean_characters(value)
                    final_str += '\n \n'

            elif len(dict_treated)==1:
                final_str += clean_characters(list(dict_treated.values())[0])
                final_str += '\n \n'

    return final_str

In [None]:

third_part_report = open ('full_report_05_20.json', "r")
full_report = json.loads(third_part_report.read())
 
# Reading from file
data_sectors = full_report['Sectoral Analysis']
#sectors = data_sectors['Sectoral Analysis']

print(get_overleaf_text_sectors(data_sectors))

In [None]:
"""final_str = ''
dict_treated = order_dict_new(first['Shocks and impact of the crisis']['Impact on systems and services']
)
                
for key, value in dict_treated.items(): 
    if str(key)!='[]':
        final_str += "\paragraph{" + omit_punctuation(key) + '}\n'
        final_str += clean_characters(value)
        final_str += '\n \n'"""

In [None]:
first_part = full_report['"Impact of the crisis and humanitarian conditions"']

In [None]:
def get_overleaf_text_first_part(first_part_dict):

    final_str = ''

    final_str += '\subsection{Context of the crisis}\n'
    context = order_dict(first_part_dict['Context of the crisis'])
    for k, v in context.items():
        final_str += "\paragraph{" + omit_punctuation(k) + '}\n'
        final_str += clean_characters(v)
        final_str += '\n \n'

    final_str += '\subsection{Shocks and impact of the crisis}\n'
    impact = first_part_dict['Shocks and impact of the crisis']

    for first_key, first_value in impact.items():

        final_str += '\subsubsection{' + first_key + '}\n'

        dict_treated = order_dict(first_value)

        if len(dict_treated)>1:
            
            for key, value in dict_treated.items(): 
                final_str += "\paragraph{" + omit_punctuation(key) + '}\n'
                final_str += clean_characters(value)
                final_str += '\n \n'

        elif len(dict_treated)==1:
            final_str += clean_characters(list(dict_treated.values())[0])
            final_str += '\n \n'

    final_str += '\subsection{Humanitarian conditions and severity of needs}\n'
    hum_conditions = order_dict(first_part_dict['Humanitarian conditions and severity of needs'])
    for first_key, first_value in hum_conditions.items():

        final_str += '\subsubsection{' + first_key + '}\n'

        dict_treated = order_dict(first_value)

        if len(dict_treated)>1:
            
            for key, value in dict_treated.items(): 
                final_str += "\paragraph{" + omit_punctuation(key) + '}\n'
                final_str += clean_characters(value)
                final_str += '\n \n'

        elif len(dict_treated)==1:
            final_str += clean_characters(list(dict_treated.values())[0])
            final_str += '\n \n'

    return final_str

print(get_overleaf_text_first_part(first_part))