## BERT Sentiment Analysis

In [24]:
import pandas as pd
import spacy
from transformers import pipeline

In [25]:
data = pd.read_csv("data/refugee_coca_foranalysis.csv")

In [26]:
try:
    nlp = spacy.load("en")
except OSError:
    nlp = spacy.load("en_core_web_sm")

def sent_tokenize(word_list, model=nlp):
    doc = model(word_list)
    sentences = [sent.string.strip() for sent in doc.sents]
    return sentences

In [27]:
data['sentences'] = data['text'].apply(sent_tokenize)

In [28]:
#5-year periods
def get_period(year, startyr, endyr, n=5):
    period_start = []
    for i in range(startyr, endyr+1, n):
        period_start.append(i)
    for index, p in enumerate(period_start):
        if year >= p:
            period = index
            continue
        else:
            break
    return period  
    
data['period'] = data['year'].apply(lambda x: get_period(x, 1991, 2015, n=5) if x>=1991 else 0)

In [29]:
data.head()

Unnamed: 0,text_id,text,word_count,year,genre,subgen,source,title,publication_info,sentences,period
0,2018849,""" bums . "" that 's what radio havana called ...",2950,1990,MAG,124.0,Money,This is the land of opportunity.,"Vol. 19 Issue 8, p98, 8p, 1 chart, 3c, 4bw\r\n","["" bums ., "" that 's what radio havana called ...",0
1,2018850,section : investing expanding petrochemical ...,2514,1990,MAG,124.0,Money,Betting on regional booms.,"Vol. 19 Issue 8, p110, 5p, 1 chart, 2c\r\n",[section : investing expanding petrochemical ...,0
2,2019006,section : clothes that work american history ...,1667,1990,MAG,130.0,MotherEarth,The evolution of jeans.,"p60, 4p, 5c, 2bw\r\n","[section :, clothes that work american history...",0
3,2019061,section : movements from socialist to republi...,1754,1990,MAG,123.0,MotherJones,Serve the people.,"Vol. 15 Issue 5, p18, 3p, 1 illustration\r\n","[section :, movements from socialist to republ...",0
4,2019063,inside a dusty cement-block house with worn ...,6032,1990,MAG,123.0,MotherJones,No road to Tahuanti.,"Vol. 15 Issue 5, p36, 11p, 8bw\r\n",[inside a dusty cement-block house with worn l...,0


In [30]:
# Allocate a pipeline for sentiment-analysis
nlp_sentiment = pipeline('sentiment-analysis')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…




In [35]:
periods = data.period.unique()

sentences = {0: [], 1:[], 2:[], 3:[], 4:[]}
sentiment_over_period = {}
sentiment_scores = []
for p in periods:
    data_period = data[data['period'] == p]
    for sent in data_period['sentences'].sum():
        if 'refugee' in sent: 
            sentences[p].append(sent)
            sentiment = nlp_sentiment(sent)
            polarity = sentiment[0]['label']
            score = sentiment[0]['score']
            
            if polarity=='NEGATIVE':
                score = -score
            
            sentiment_scores.append(score)
    avg = sum(sentiment_scores) / len(sentiment_scores)
    sentiment_over_period[p] = avg

In [39]:
sentences[1]

['we have served incest and rape survivors , battered women , batterers , mentally ill and homeless women , refugees , alcoholics , drug addicts , the disabled , the elderly-and each other .',
 'welcomed many huguenot refugees escaping the religious wars in france in the latter half of the sixteenth century .',
 'moreover , the children in devils lake sent out a warm welcome to the 74 flood-refugee children attending the elementary schools .',
 'the joint government barely functions , the economy is weak , more than two million refugees are still unable to return home , nationalist parties remain in power , and individuals indicted by           as many bosnians say , life is stronger than politics .',
 "in the absence of a magical keystroke that would allow people mentally to adapt to a new alphabet           government , led by heydar aliyev , a former chief of the azerbaijani kgb and member of the soviet politburo , has turned its attention away from alphabets to more-pressing proble

In [37]:
sentiment_over_period

{0: -0.35246733745275916,
 1: -0.333858796295801,
 2: -0.3225823972272067,
 3: -0.3166105217395983,
 4: -0.3061428936151372}

## BERT Text Generation

In [40]:
from transformers import AutoModelWithLMHead, AutoTokenizer

In [41]:
def textgen_results(sequence):
    refugees_textgen = {}
    tokenizer_gpt = AutoTokenizer.from_pretrained("gpt2")
    model_gpt = AutoModelWithLMHead.from_pretrained("gpt2")

    input = tokenizer_gpt.encode(sequence, return_tensors="pt")
    generated = model_gpt.generate(input, max_length=50)
    resulting_string = tokenizer_gpt.decode(generated.tolist()[0])
    refugees_textgen['gpt'] = resulting_string

    for ideology in ['left','right']:
        tokenizer = AutoTokenizer.from_pretrained("bertresults/output_gpt_{}".format(ideology))
        model = AutoModelWithLMHead.from_pretrained("bertresults/output_gpt_{}".format(ideology))

        input = tokenizer.encode(sequence, return_tensors="pt")
        generated = model.generate(input, max_length=50, bos_token_id=1, pad_token_id=1, eos_token_ids=1)
        resulting_string = tokenizer.decode(generated.tolist()[0])
        refugees_textgen['all_{}'.format(ideology)] = resulting_string
        
        for period in range(0,5):
            tokenizer = AutoTokenizer.from_pretrained("bertresults/output_gpt_period{}_{}".format(period, ideology))
            model = AutoModelWithLMHead.from_pretrained("bertresults/output_gpt_period{}_{}".format(period, ideology))

            input = tokenizer.encode(sequence, return_tensors="pt")
            generated = model.generate(input, max_length=50, bos_token_id=1, pad_token_id=1, eos_token_ids=1)
            resulting_string = tokenizer.decode(generated.tolist()[0])

            refugees_textgen['period{}_{}'.format(period, ideology)] = resulting_string
    return refugees_textgen

In [42]:
refugees_textgen = textgen_results("Refugees are")
refugees_textgen

{'gpt': 'Refugees are not allowed to enter the country.\n\nThe government has said it will not allow refugees to enter the country.\n\nThe government has said it will not allow refugees to enter the country.\n\nThe government has said!',
 'all_left': 'Refugees are being held in camps in the middle east. the united states has been sending troops to the region. the united states has been sending troops to the region. and the united states is sending troops to the region. and the united"',
 'period0_left': 'Refugees are being held in camps in the middle east, and the u.n. has been trying to get them out of there.!mr-lehrer : well, i think that the u.n. has been trying"',
 'period1_left': 'Refugees are being held in camps in the north of the country. the government has said that they will be released if they are found guilty.  "" we\'re not going to let them go, "" said a senior government official."',
 'period2_left': 'Refugees are being held in the camp of the refugees, where they are be

In [43]:
textgen_results("Israeli refugees are")

{'gpt': 'Israeli refugees are being held in detention centers in Turkey, where they are being held in a detention center for the first time since the war began.\n\nThe Turkish government has said it will not allow the refugees to return to Turkey, and has!',
 'all_left': 'Israeli refugees are being held in camps in the middle east. the united states has been trying to get them to return to their homes. but the united states has not been able to do that.!mr-macneil : and the united"',
 'period0_left': 'Israeli refugees are being held in camps in the middle east. the united states has been trying to get them out of the camps. the united states has been trying to get them out of the camps. the united states has been trying to get them"',
 'period1_left': 'Israeli refugees are being held in camps in the north of the country.  "" we\'re not going to let them go, "" said a senior official in the camp.  "" we\'re going to take them to the border. """',
 'period2_left': 'Israeli refugees are b

In [44]:
textgen_results("Syrian refugees are")

{'gpt': 'Syrian refugees are being sent to Syria to fight for their country.\n\nThe Syrian government has been accused of using the refugees as human shields, and has been accused of using them as human shields in the past.\n\nThe UN refugee agency!',
 'all_left': 'Syrian refugees are being held in camps in the middle east. the united states has been trying to get the refugees to return to their homes in the middle east. but the united states has not been able to do that.!mr-macne"',
 'period0_left': 'Syrian refugees are being held in camps in the middle east. the united states has been sending troops to the camps to help them. the united states has also been sending troops to the camps to help the refugees. the united states has been sending troops"',
 'period1_left': 'Syrian refugees are being held in camps in the north of the country.              , the refugees are being held in camps in the north of the country.      "',
 'period2_left': 'Syrian refugees are being held in camps in