# Batik Resilience Project 

In [9]:
import mediacloud, datetime, time, json, re, random, os, time, csv, mediacloud.api, operator
from datetime import date, timedelta
mc = mediacloud.api.AdminMediaCloud(os.environ['MC_API_KEY'])
mediacloud.__version__

'3.7.4'

In [10]:
timespan = "publish_day:[2017-01-01T00:00:00Z TO 2019-12-01T00:00:00Z]"
us_media = ['34412234', '38379429']
places = {
    'Sydney': ['34412282', '38378024'],
    'Chennai': ['34412118', '38379954'],
    'Florida': ['38379430'],
    'Alaska': ['38381315'],
    'Kenya': ['34412126', '38380260'],
    'Singapore': ['34412474'],
    'London': ['34412476', '38381111'],
    'Brazil': ['34412257', '38379250'],
    'Islands': [ '34412175', '34412204', '34412411', '38381481', '34412109', '34412204', '34412168', '34412399', '38380877', '34412137' ],
    '"Saudi Arabia"': ['34412050', '38380804'],
    # 'Argentina': ['34412043', '38376412'],
    # 'Paris': ['34412146', '38379799'],
    # 'Amsterdam': ['34412382', '38380454'],
    # 'Rome': ['34412372', '38380117'],
    # 'Milan': ['34412372', '38380117'],
    # 'Madrid': ['34412356', '38002034'],
    # 'Athens': ['34412477', '38379845'],
    #'Mexico': ['34412427', '38380322'],
    # 'Rotterdam': ['34412382', '38380454'],
    
}
queries = {
    'security': '"security climate"~40',
    'resilience': '"resilience climate"~40',
    'migration': '(("migration climate"~40) OR  ("migrant climate"~40) OR  ("migrate climate"~40)  OR  ("immigration climate"~40)  OR  ("immigrate climate"~40))'
}

### Compute Batik Length Data

In [262]:
length_data = []
for p, m in places.items():
    for t, q in queries.items():
        local_relevant_story_count = mc.storyCount("{} AND {} AND language:en AND tags_id_media:({})".format(p,q," ".join(m)), solr_filter=timespan)
        local_total_story_count = mc.storyCount("{} AND language:en AND tags_id_media:({})".format(p," ".join(m)), solr_filter=timespan)
        us_relevant_story_count = mc.storyCount("{} AND {} AND language:en AND tags_id_media:({})".format(p,q," ".join(us_media)), solr_filter=timespan)
        us_total_story_count = mc.storyCount("{} AND language:en AND tags_id_media:({})".format(p," ".join(us_media)), solr_filter=timespan)
        row = {
            'place': p,
            'topic': t,
            'local_relevant': local_relevant_story_count['count'],
            'local_total': local_total_story_count['count'],
            'us_relevant': us_relevant_story_count['count'],
            'us_total': us_total_story_count['count'],
            'combined_relevant': local_relevant_story_count['count'] + us_relevant_story_count['count'],
            'combined_total': local_total_story_count['count'] + us_total_story_count['count'],
        }
        length_data.append(row)

In [263]:
import csv, json
with open('batik-data/batik-length.json', 'w') as f:
    f.write(json.dumps(length_data))
with open('batik-data/batik-length.csv', 'w') as csvfile:
    spamwriter = csv.writer(csvfile)
    spamwriter = csv.DictWriter(csvfile, fieldnames=['place', 'topic', 'local_relevant', 'local_total', 
                                                    'us_relevant', 'us_total', 'combined_relevant', 'combined_total'])
    spamwriter.writeheader()
    for row in length_data:
        spamwriter.writerow(row)

### Compute Batik Themes

In [None]:
NYT_LABELS_TAG_SET_ID = 1963  # the tag set all the descriptor tags are in
theme_data = []
for p, m in places.items():
    for t, q in queries.items():
        top_themes = mc.storyTagCount("{} AND {} AND language:en AND tags_id_media:({})".format(p,q," ".join(m)), solr_filter=timespan, tag_sets_id=NYT_LABELS_TAG_SET_ID)
        item = {
            'place': p,
            'topic': t,
            'top_themes': [{'count': tag['count'], 'name': tag['description'], 'tags_id': tag['tags_id']} for tag in top_themes]
        }
        theme_data.append(item)

In [None]:
import csv
import json
with open('batik-data/batik-themes.json', 'w') as f:
    f.write(json.dumps(theme_data))
with open('batik-data/batik-themes.csv', 'w') as csvfile:
    spamwriter = csv.writer(csvfile)
    spamwriter.writerow(["{} - {}".format(i['place'], i['topic']) for i in theme_data])
    for idx in range(0, 20):
        spamwriter.writerow([i['top_themes'][idx]['name'] for i in theme_data if idx < len(i['top_themes'])])

## Compute Climate Coverage Counts

In [260]:
climate_data = []
for p, m in places.items():
    climate_story_count = mc.storyCount('{} AND ("climate change" OR "global warming") AND language:en AND tags_id_media:({})'.format(p," ".join(m)), solr_filter=timespan)
    security_story_count = mc.storyCount('{} AND {} AND language:en AND tags_id_media:({})'.format(p,queries['security']," ".join(m)), solr_filter=timespan)
    resilience_story_count = mc.storyCount('{} AND {} AND language:en AND tags_id_media:({})'.format(p,queries['resilience']," ".join(m)), solr_filter=timespan)
    migration_story_count = mc.storyCount('{} AND {} AND language:en AND tags_id_media:({})'.format(p,queries['migration']," ".join(m)), solr_filter=timespan)
    total_story_count = mc.storyCount("{} AND language:en AND tags_id_media:({})".format(p," ".join(m)), solr_filter=timespan)
    row = {
        'place': p,
        'security-stories': security_story_count['count'],
        'resilience-stories': resilience_story_count['count'],
        'migration-stories': migration_story_count['count'],
        'climate-change-stories': climate_story_count['count'],
        'total-stories': total_story_count['count'],
    }
    climate_data.append(row)

In [261]:
import csv, json
with open('batik-data/batik-climate-coverage.json', 'w') as f:
    f.write(json.dumps(climate_data))
with open('batik-data/batik-climate-coverage.csv', 'w') as csvfile:
    spamwriter = csv.DictWriter(csvfile, fieldnames=["place", "security-stories", "resilience-stories", "migration-stories", "climate-change-stories", "total-stories"])
    spamwriter.writeheader()
    for row in climate_data:
        spamwriter.writerow(row)

## Compute Climate Coverage Over Time

In [264]:
climate_attention_data = []
for p, m in places.items():
    climate_story_count = mc.storyCount('{} AND ("climate change" OR "global warming") AND language:en AND tags_id_media:({})'.format(p," ".join(m)), solr_filter=timespan, split=True, split_period='week')
    security_story_count = mc.storyCount('{} AND {} AND language:en AND tags_id_media:({})'.format(p,queries['security']," ".join(m)), solr_filter=timespan, split=True, split_period='week')
    resilience_story_count = mc.storyCount('{} AND {} AND language:en AND tags_id_media:({})'.format(p,queries['resilience']," ".join(m)), solr_filter=timespan, split=True, split_period='week')
    migration_story_count = mc.storyCount('{} AND {} AND language:en AND tags_id_media:({})'.format(p,queries['migration']," ".join(m)), solr_filter=timespan, split=True, split_period='week')
    total_story_count = mc.storyCount("{} AND language:en AND tags_id_media:({})".format(p," ".join(m)), solr_filter=timespan, split=True, split_period='week')
    row = {
        'place': p,
        'security-stories': security_story_count,
        'resilience-stories': resilience_story_count,
        'migration-stories': migration_story_count,
        'climate-change-stories': climate_story_count,
        'total-stories': total_story_count,
    }
    climate_attention_data.append(row)

In [265]:
import csv, json
with open('batik-data/batik-climate-attention.json', 'w') as f:
    f.write(json.dumps(climate_attention_data))


## Top Words for each Pair

In [183]:
word_data = []
word_query = "{} AND {} AND language:en AND tags_id_media:({})"
for p, m in places.items():
    for t, q in queries.items():
        top_words = mc.wordCount(word_query.format(p,q," ".join(m)), solr_filter=timespan)
        item = {
            'place': p,
            'topic': t,
            'top_words': [{'word': w['term'], 'freq': w['count']} for w in top_words if w['term'] not in [p.lower(), 'climate']]
        }
        word_data.append(item)

In [185]:
import csv, json
with open('batik-data/batik-words.json', 'w') as f:
    f.write(json.dumps(word_data))
with open('batik-data/batik-words.csv', 'w') as csvfile:
    spamwriter = csv.writer(csvfile)
    spamwriter.writerow(["{} - {}".format(i['place'], i['topic']) for i in word_data])
    for idx in range(0, 40):
        spamwriter.writerow([i['top_words'][idx]['word'] for i in word_data if idx < len(i['top_words'])])

## Media Source Counts

In [15]:
counts = []
for p, m in places.items():
    place_media_count = 0
    media_list = []
    for tags_id in m:
        last_id = 0
        media_count = 0
        more_media = True
        top_media = []
        while more_media:
            media_page = mc.mediaList(tags_id=tags_id, rows=100, last_media_id=last_id)
            media_list += media_page
            if len(media_page) == 0:
                more_media = False
            else:
                media_count += len(media_page)
                last_id = media_page[-1]['media_id']
        place_media_count += media_count
    item = {
        'place': p,
        'media_count': place_media_count,
        'top_media': sorted(media_list, key=operator.itemgetter('num_stories_90'), reverse=True)[:20]
    }
    counts.append(item)

In [16]:
import csv, json
with open('batik-data/batik-media-counts.json', 'w') as f:
    f.write(json.dumps(counts))
with open('batik-data/batik-media-counts.csv', 'w') as csvfile:
    spamwriter = csv.DictWriter(csvfile, fieldnames=["place", "media_count"], extrasaction='ignore')
    spamwriter.writeheader()
    for row in counts:
        spamwriter.writerow(row)

## Collections

In [17]:
collection_data = []
for p, m in places.items():
    coll_list = [mc.tag(tags_id) for tags_id in m]
    item = {
        'place': p,
        'collections': coll_list,
    }
    collection_data.append(item)

In [18]:
import csv, json
with open('batik-data/batik-collections.json', 'w') as f:
    f.write(json.dumps(collection_data))