# Batik Resilience Project: Report Generation

In [73]:
# intial imports
import datetime, time, json, re, random, os, time, csv, mediacloud.api
import locale
locale.setlocale(locale.LC_ALL, '')  # Use '' for auto, or force e.g. to 'en_US.UTF-8'
from datetime import datetime as dt, date, timedelta
import slug, csv, json
from yattag import Doc, indent

In [74]:
# shared data
timespan = "publish_day:[2017-01-01T00:00:00Z TO 2019-12-01T00:00:00Z]"
us_media = ['34412234', '38379429']
places = {
    'Sydney': ['34412282', '38378024'],
    'Chennai': ['34412118', '38379954'],
    'Florida': ['38379430'],
    'Alaska': ['38381315'],
    'Kenya': ['34412126', '38380260'],
    'Singapore': ['34412474'],
    'London': ['34412476', '38381111'],
    'Brazil': ['34412257', '38379250'],
    'Islands': [ '34412175', '34412204', '34412411', '38381481', '34412109', '34412204', '34412168', '34412399', '38380877', '34412137' ],
    '"Saudi Arabia"': ['34412050', '38380804'],
    # 'Argentina': ['34412043', '38376412'],
    # 'Paris': ['34412146', '38379799'],
    # 'Amsterdam': ['34412382', '38380454'],
    # 'Rome': ['34412372', '38380117'],
    # 'Milan': ['34412372', '38380117'],
    # 'Madrid': ['34412356', '38002034'],
    # 'Athens': ['34412477', '38379845'],
    #'Mexico': ['34412427', '38380322'],
    # 'Rotterdam': ['34412382', '38380454'],
    
}
queries = {
    'security': '"security climate"~40',
    'resilience': '"resilience climate"~40',
    'migration': '(("migration climate"~40) OR  ("migrant climate"~40) OR  ("migrate climate"~40)  OR  ("immigration climate"~40)  OR  ("immigrate climate"~40))'
}

## Load Data Files

In [78]:
# read in 
with open('batik-data/batik-words.json') as f:
    word_data = json.load(f)
with open('batik-data/batik-media-counts.json', 'r') as f:
    media_counts = json.load(f)
with open('batik-data/batik-themes.json', 'r') as f:
    theme_data = json.load(f)
with open('batik-data/batik-climate-attention.json', 'r') as f:
    attention_data = json.load(f)
with open('batik-data/batik-collections.json', 'r') as f:
    collection_data = json.load(f)

## Helper Functions

In [76]:
SOLR_DATE_FORMAT = '%Y-%m-%d'

def solr_to_date(date_str):
    return dt.strptime(date_str[:10], SOLR_DATE_FORMAT)

def stat_card(title, number, subtitle):
    with tag('div', klass="col"):
        with tag('div', klass='card stat'):
            with tag('div', klass='card-body'):
                with tag('h5', klass='card-title'):
                    text(title)
                with tag('p', klass='card-text stat'):
                    text(str(number))
                with tag('p', klass="card-subtitle text-muted"):
                    text(subtitle)

# helper function to write a set of stats for a theme
def story_stat_card(theme_name, story_counts):
    theme_story_count = int(story_counts['{}-stories'.format(theme_name)])
    climate_story_count = int(story_counts['climate-change-stories'])
    pct = theme_story_count / climate_story_count
    stat_card("Climate & {}".format(theme_name.capitalize()),
              "{:.0%}".format(pct),
              'stories about climate change also mention {}'.format(theme_name))

# helper function to write all parts of one section
def topic_section(place_name, topic_name, story_counts):
    topic_story_count = int(story_counts['{}-stories'.format(topic_name)])
    climate_story_count = int(story_counts['climate-change-stories'])
    # summary
    with tag('div', klass="row"):
        with tag('div', klass="col"):
            with tag('h2'):
                text('Climate and {}'.format(topic_name.capitalize()))
            with tag('p', klass='lead'):
                text('There were {:n} stories about climate and {}. This represents {:.2%} of the stories about climate in some way.'.format(
                    topic_story_count,
                    topic_name,
                    topic_story_count/climate_story_count
                ))
    # attention over time chart
    div_id = "{}-{}-attention-chart".format(slug.slug(place_name), topic_name)
    with tag('div', klass="row"):
        with tag('div', klass="col"):
            with tag('div', id=div_id, klass="linechart"):
                with tag('script'):
                    data = [r for r in attention_data if r['place'] == p][0]['{}-stories'.format(topic_name)]['counts']
                    data = [[solr_to_date(r['date']).timestamp()*1000, int(r['count'])] for r in data]
                    doc.asis('$(function() {{linechart("{} Stories", "{}", {}, 200)}})'.format(topic_name.capitalize(), div_id, json.dumps(data)))
    with tag('div', klass="row"):
        # word cloud
        top_words = [d for d in word_data if (d['place'] == place_name) and (d['topic'] == topic_name)][0]['top_words']
        top_words = [{'name': t['word'], 'weight': t['freq']} for t in top_words
                     if t['word'] not in ['brazilian','australian', topic_name, 'kenyan', 'kenyans', 'alaskans', 'india', 'pacific', 'saudi', 'arabia', 'singaporeans', 'australia']][:100]
        with tag('div', klass="col"):
            with tag('h3'):
                text('Top Words')
            with tag('p', klass='headline'):
                text('Here are the top 100 words used in articles about climate and {}. These are words used in sentences that also include those two terms.'.format(topic_name))
            chart_id = "{}-{}-top-words".format(slug.slug(place_name), topic_name)
            with tag('div', id=chart_id, klass="wordcloud"):
                text('')
            with tag('script'):
                doc.asis('$(function() {{wordcloud("{}", {}, 300)}})'.format(
                    chart_id, json.dumps(top_words)))
        # top themes
        top_themes = [d for d in theme_data if (d['place'] == place_name) and (d['topic'] == topic_name)][0]['top_themes'][:7]
        with tag('div', klass="col"):
            with tag('h3'):
                text('Top Themes')
            with tag('p', klass='headline'):
                text('Here are the top "themes" detected by our computational model in stories about climate and {}. This model was trained on labels applied to New York Times stories over 2 decades.'.format(topic_name))
                with tag('table', klass='table'):
                    with tag('thead'):
                        with tag('tr'):
                            with tag('th'):
                                text('Theme')
                            with tag('th'):
                                text('Percent of Stories')
                    with tag('tbody'):
                        for theme in top_themes:
                            with tag('tr'):
                                with tag('td'):
                                    text(theme['name'])
                                with tag('td'):
                                    text("{:.0%}".format(int(theme['count'])/topic_story_count))
        


## Write Reports

In [82]:
for p, m in places.items():
    doc, tag, text = Doc().tagtext()
    doc.asis('<!DOCTYPE html>')
    place_name = p.replace('"', '')
    title = "{}: Climate in the News".format(place_name)
    place_media = [r for r in media_counts if r['place'] == p][0]
    place_media_count = int(place_media['media_count'])
    place_collections = [r for r in collection_data if r['place'] == p][0]['collections']
    story_counts = {}
    with open('batik-data/batik-climate-coverage.csv', 'r') as csvfile:
        dict_reader = csv.DictReader(csvfile)
        for row in dict_reader:
            if row['place'] == p:
                story_counts = row
    with tag('html', lang="en"):
        with tag('head'):
            doc.stag('meta', charset="utf-8")
            doc.stag('meta', name="viewport", content="width=device-width, initial-scale=1")
            doc.stag('link', rel="stylesheet", href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css", integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh", crossorigin="anonymous")
            doc.stag('link', rel="stylesheet", href="report.css")
            with tag('script', src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"):
                text()
            with tag('script', src="https://code.highcharts.com/highcharts.js"):
                text()
            with tag('script', src="https://code.highcharts.com/modules/wordcloud.js"):
                text()
            with tag('script', src="report.js"):
                text()
            with tag('title'):
                text(title)
        with tag('body'):
            with tag('div', klass="container"):
                with tag('header'):
                    with tag('div', klass="row"):
                        with tag('div', klass="col"):
                            with tag('h1'):
                                text(title)
                # total count summary
                content = ''
                content += "Using the Media Cloud database, we searched English-language sources published in {}. We gathered all stories published in 2017, 2018, and 2019; {:n} stories in total.  ".format(
                    place_name, place_media_count)
                pct = int(story_counts['climate-change-stories']) / int(story_counts['total-stories'])
                content += 'This included {:n} total stories. {:.2%} mentioned "climate change" or "global warming" in some way ({:n} stories).'.format(
                    int(story_counts['total-stories']), pct, int(story_counts['climate-change-stories']))
                with tag('div', klass="row"):
                    with tag('div', klass="col"):
                        with tag('p', klass="lead"):
                            text(content)
                    # climate story pie chart
                    div_id = "{}-climate-stories-chart".format(slug.slug(place_name))
                    with tag('div', klass="col"):
                        with tag('div', id=div_id, klass="piechart"):
                            text('')
                        with tag('script'):
                            data = [
                                {'name': 'Climate Change Stories', 'y': int(story_counts['climate-change-stories']), 'color': '#300D28'},
                                {'name': 'Other Stories', 'y': int(story_counts['total-stories'])-int(story_counts['climate-change-stories']), 'color': '#cccccc'},
                            ]
                            doc.asis('$(function() {{piechart("Climate Stories", "{}", {}, 300)}})'.format(div_id, json.dumps(data)))
                # attention over time chart
                div_id = "{}-climate-attention-chart".format(slug.slug(place_name))
                with tag('div', klass="row"):
                    with tag('div', klass="col"):
                        with tag('div', id=div_id, klass="linechart"):
                            with tag('script'):
                                data = [r for r in attention_data if r['place'] == p][0]['climate-change-stories']['counts']
                                data = [[solr_to_date(r['date']).timestamp()*1000, int(r['count'])] for r in data]
                                doc.asis('$(function() {{linechart("Stories", "{}", {}, 200)}})'.format(div_id, json.dumps(data)))
                # story summary cards
                with tag('div', klass="row"):
                    story_stat_card('resilience', story_counts)
                    story_stat_card('security', story_counts)
                    story_stat_card('migration', story_counts)
                # theme sections
                topic_section(p, 'resilience', story_counts)
                topic_section(p, 'security', story_counts)
                topic_section(p, 'migration', story_counts)
                # source data section
                with tag('div', klass="row"):
                    with tag('div', klass="col"):
                        with tag('h2'):
                            text("Source Data")
                with tag('div', klass="row"):
                    with tag('div', klass="col"):
                        with tag('p', klass="lead"):
                            text("Media Cloud is an open-source platform for media analysis. It stores more than 1.5 billion media stories published since 2010 around the world, and ingests more than 600,000 new stories each day. Media Cloud is a joint project of the MIT Center for Civic Media and the Harvard Berkman Klein Center for Internet and Society.")
                with tag('div', klass="row"):
                    with tag('div', klass="col"):
                        with tag('div', klass="row"):
                            with tag('div', klass="col"):
                                with tag('h3'):
                                    text("Geographic Collections")
                                with tag('p'):
                                    text("We analyzed media sources in {:n} different categories:".format(len(place_collections)))
                                with tag('ul'):
                                    for t in place_collections:
                                        with tag('li'):
                                            text(t['label'])
                        with tag('div', klass="row"):
                            stat_card('Total Media Sources', "{:n}".format(int(place_media_count)), 'Media sources from {}'.format(place_name))
                        with tag('div', klass="row"):
                            stat_card('Total Stories', "{:n}".format(int(story_counts['total-stories'])), "English language stories in 2019")

                    with tag('div', klass="col"):
                        top_count = 20
                        with tag('h3'):
                            text("Top Media Sources")
                        with tag('p'):
                            text("Here are the top {:n} sources from {} in Media Cloud based on the number of articles they published in 2019.".format(top_count, place_name))
                        with tag('ul'):
                            for m in place_media['top_media'][:top_count]:
                                with tag('li'):
                                    text("{} ".format(m['name']))
                                    with tag("small"):
                                        text("- {:n} stories a day".format(int(m['num_stories_90'])))

    with open('reports/batik-report-{}.html'.format(slug.slug(place_name)), 'w') as f:
        f.write(indent(doc.getvalue()))
