In [67]:
import tweepy
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import cnfg
from tqdm import tqdm
from pymongo import MongoClient
import json
import pandas as pd
import numpy as np
import nltk
from pprint import pprint
from gensim.parsing.preprocessing import remove_stopwords
from sklearn.feature_extraction.text import CountVectorizer
from gensim import corpora, models, similarities, matutils

import logging
from textblob import TextBlob
import gensim
import pyLDAvis
from pyLDAvis import gensim as gensimvis
import spacy

# Get updated document from database and create a dataframe

In [68]:
# Connect to monogodb server.  Make sure it's running first!
client = MongoClient('localhost', 27017)

client.list_database_names()

['admin',
 'climate_db',
 'climate_db2',
 'climate_db3',
 'config',
 'local',
 'wildlife_db']

In [69]:
# Connect to the database for the first time
climate_db = client['climate_db2']

# Create a collection
climate_collection = climate_db.tweets

In [70]:
res = climate_db['tweets'].find()
res_list = [item for item in res]
df = pd.DataFrame(res_list)
df.head()

Unnamed: 0,_id,created_at,favorite_count,retweet_count,screen_name,text
0,5be9d6bab28573cc45ea668a,2018-11-12 19:13:43,0,0,ndsandberg23,Climate Change: hold my beer! https://t.co/ZBx...
1,5be9d6bab28573cc45ea668b,2018-11-12 19:13:42,0,0,FresnoCountyGOP,"Gov Brown said that Climate Change, not forest..."
2,5be9d6bab28573cc45ea668c,2018-11-12 19:13:41,0,0,PatDeRocH,.@ExportDevCanada provided $10.4 billion in fi...
3,5be9d6bab28573cc45ea668d,2018-11-12 19:13:34,0,0,Grace75646541,In 16 years from now humans will be underwater...
4,5be9d6bab28573cc45ea668e,2018-11-12 19:13:32,0,0,Totalrecoverys1,Climate Change Is Fueling California's Wildfir...


In [71]:
df.shape

(16633, 6)

In [72]:
train_text = df.text[:10000].values



# Look at some tweets

In [73]:
for i in train_text[:10]:
    print(i.lower())
    

climate change: hold my beer! https://t.co/zbx6o5ejts
gov brown said that climate change, not forestry mgmt, is the cause of the fires. it seems more likely that forestr… https://t.co/mtgdnkq77x
.@exportdevcanada provided $10.4 billion in financing to oil &amp; gas companies last year. today, @envirodefence joins… https://t.co/jhbulrvaic
in 16 years from now humans will be underwater due to global warming and we will be so happy. what a exciting time… https://t.co/smnjxpovpc
climate change is fueling california's wildfires, scientists say | here &amp; now https://t.co/fbzreqqbdc #smartnews
debate moderators in this last election pushed candidates hard on #climatechange, especially in gubernatorial debat… https://t.co/zufw20yurd
what happens when renewable energy economically outperforms fossil fuels like coal? are there any arguments for coa… https://t.co/mczp7gun5e
the intensity of california’s forest fires can be directly related to global warming. the emotional sadness that wi… http

# Text Preprocessing

In [74]:
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['https', 'climate', 'change', 'co', 'climatechange', 'thing', 'via', 'de', 'global warming', 'global', 'rt', \
               'warming', 'th', 'amp', 'environment']
stopwords.extend(newStopWords)

In [75]:
# Create a CountVectorizer for parsing/counting words
count_vectorizer = CountVectorizer(ngram_range=(1, 3),  
                                   stop_words=stopwords, token_pattern="\\b[a-z][a-z]+\\b")
count_vectorizer.fit(train_text)

CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 3), preprocessor=None,
        stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs',...nge', 'thing', 'via', 'de', 'global warming', 'global', 'rt', 'warming', 'th', 'amp', 'environment'],
        strip_accents=None, token_pattern='\\b[a-z][a-z]+\\b',
        tokenizer=None, vocabulary=None)

In [76]:
# Create the term-document matrix
# Transpose it so the terms are the rows
counts = count_vectorizer.transform(train_text).transpose()

In [77]:
# Convert sparse matrix of counts to a gensim corpus
corpus = matutils.Sparse2Corpus(counts)


In [78]:
id2word = dict((v, k) for k, v in count_vectorizer.vocabulary_.items())

In [79]:
len(id2word)

111171

# Perform LDA

In [80]:
# Create lda model (equivalent to "fit" in sklearn)
lda = models.ldamulticore.LdaMulticore(corpus=corpus, num_topics=15, minimum_probability=0.05, id2word=id2word, passes=10)



In [81]:
pprint(lda.print_topics())

[(0,
  '0.003*"help" + 0.003*"stop" + 0.002*"flooding" + 0.002*"deserts" + '
  '0.002*"would" + 0.002*"flooding deserts" + 0.002*"deserts help" + '
  '0.002*"help stop" + 0.002*"deserts help stop" + 0.002*"flooding deserts '
  'help"'),
 (1,
  '0.002*"california" + 0.002*"trump" + 0.002*"fires" + 0.001*"need" + '
  '0.001*"get" + 0.001*"fire" + 0.001*"today" + 0.001*"us" + 0.001*"sea" + '
  '0.001*"level"'),
 (2,
  '0.003*"big" + 0.003*"new" + 0.003*"big oil" + 0.003*"oil" + 0.003*"even" + '
  '0.003*"part" + 0.003*"study" + 0.003*"claims" + 0.003*"big oil claims" + '
  '0.003*"oil claims"'),
 (3,
  '0.004*"wildfires" + 0.004*"california" + 0.002*"california wildfires" + '
  '0.002*"us" + 0.002*"even" + 0.002*"scientists" + 0.002*"brown" + '
  '0.001*"politics" + 0.001*"trump" + 0.001*"new"'),
 (4,
  '0.007*"energy" + 0.007*"government" + 0.006*"department" + '
  '0.006*"protesters" + 0.006*"energy department" + 0.006*"blockade" + '
  '0.005*"protesters blockade" + 0.005*"government en

# Get top words and put them in a from which can be used for google search query

In [82]:
import re

topic_words = []

for i in lda.print_topics():
    new_row = []
    for j in re.split('\+',i[1]):
        k = re.sub(r"[^A-Za-z ]+", '', str(j))
        k = k.strip()
        k = re.sub(r" ",'+', str(k))
        new_row.append(k)
        
    topic_words.append(new_row)


In [83]:
import numpy.ma as ma

compressed_topics = []

for topic in np.array(topic_words):

    remove_list = []
    for word1 in topic:
        check = 0
        for word2 in topic:
            if word1 == word2:
                pass
            elif word1 in word2:
                check = 1
        remove_list.append(check)
    mx = ma.masked_array(topic, mask=remove_list)
    compressed_topics.append(list(mx.compressed()))
                

In [84]:
for i in compressed_topics:
    print(i)

['would', 'deserts+help+stop', 'flooding+deserts+help']
['california', 'trump', 'fires', 'need', 'get', 'today', 'us', 'sea', 'level']
['new', 'even', 'part', 'study', 'big+oil+claims']
['california+wildfires', 'us', 'even', 'scientists', 'brown', 'politics', 'trump', 'new']
['protesters+blockade', 'blockade+government', 'government+energy+department']
['trump', 'neil+young+loses', 'young+loses+home']
['jerry+brown+blames', 'california+wildfires', 'blames+california']
['neil+young', 'trump', 'home', 'losing', 'california+fires', 'criticises']
['real', 'new', 'bears', 'could', 'california', 'polar', 'temperature', 'carbon', 'many', 'energy']
['california', 'trump', 'blames', 'fires', 'protests+leads+arrests']
['california', 'fire', 'ran+republican+stronghold', 'democrat+ran']
['trump', 'california', 'people', 'wildfires', 'blames', 'plan', 'real']
['california', 'trump', 'forest', 'wildfires', 'president', 'new', 'management', 'say', 'sea']
['energy', 'blame', 'fossil+fuels', 'fires+hap

# Create url links for external resources

In [86]:
url_list = []

for i in compressed_topics:
    search_terms = "+".join(i)
    url = "http://www.google.com/search?q={}&btnI".format(search_terms)
    print(url)
    url_list.append(url)
    
    


http://www.google.com/search?q=would+deserts+help+stop+flooding+deserts+help&btnI
http://www.google.com/search?q=california+trump+fires+need+get+today+us+sea+level&btnI
http://www.google.com/search?q=new+even+part+study+big+oil+claims&btnI
http://www.google.com/search?q=california+wildfires+us+even+scientists+brown+politics+trump+new&btnI
http://www.google.com/search?q=protesters+blockade+blockade+government+government+energy+department&btnI
http://www.google.com/search?q=trump+neil+young+loses+young+loses+home&btnI
http://www.google.com/search?q=jerry+brown+blames+california+wildfires+blames+california&btnI
http://www.google.com/search?q=neil+young+trump+home+losing+california+fires+criticises&btnI
http://www.google.com/search?q=real+new+bears+could+california+polar+temperature+carbon+many+energy&btnI
http://www.google.com/search?q=california+trump+blames+fires+protests+leads+arrests&btnI
http://www.google.com/search?q=california+fire+ran+republican+stronghold+democrat+ran&btnI
http:/

# Jensen Shannon inequality

In [91]:
mdiff, annotation = lda.diff(lda, distance='jensen_shannon')

In [92]:
mdiff

array([[0.        , 0.80907799, 0.84923316, 0.83738829, 0.85821396,
        0.89331375, 0.89413322, 0.97178084, 0.82462331, 0.87619591,
        0.85786606, 0.83074138, 0.83262448, 0.85446972, 0.82479364],
       [0.80907799, 0.        , 0.83121388, 0.81822375, 0.85962288,
        0.85333438, 0.87962479, 0.90945448, 0.82438849, 0.85994976,
        0.84522181, 0.81242591, 0.80528339, 0.84015634, 0.80808558],
       [0.84923316, 0.83121388, 0.        , 0.86041654, 0.8929408 ,
        0.91393311, 0.93159406, 0.98356384, 0.85325712, 0.90575303,
        0.86560602, 0.86309918, 0.858615  , 0.87858732, 0.84536188],
       [0.83738829, 0.81822375, 0.86041654, 0.        , 0.8818932 ,
        0.88398522, 0.86356175, 0.95574487, 0.8475964 , 0.88649668,
        0.86349836, 0.84178394, 0.83369558, 0.85996186, 0.83048798],
       [0.85821396, 0.85962288, 0.8929408 , 0.8818932 , 0.        ,
        0.92515659, 0.93475861, 1.        , 0.86517598, 0.90285062,
        0.89451231, 0.88268644, 0.88385927, 

In [137]:
differences = []

for i,v in enumerate(mdiff):
    print(i)
    print(sum(v))
    differences.append(sum(v))

0
12.01445571225112
1
11.75606342891445
2
12.333174909578235
3
12.064734418114286
4
12.533740812795415
5
12.539674010825392
6
12.683112022101415
7
13.41698407467131
8
12.136429504651822
9
12.576126551798936
10
12.324674002417973
11
12.025170964818324
12
12.007973257689002
13
12.315494448582761
14
11.990239241449236


# Get tweets in each topic, for sentiment analysis per topic

In [94]:
corpus_topics = []

for i in range(0,len(corpus)):
    top_topics = lda.get_document_topics(corpus[i])
    top_topics.sort(key=lambda x: x[1], reverse=True)
    corpus_topics.append(top_topics[0][0])

In [95]:
clustered_corpus = list(zip(train_text,corpus_topics))

In [97]:
clustered_tweets = []

for i in range(0,20):
    temp_list = []
    for j in clustered_corpus:
        if i == j[1]:
            temp_list.append(j[0])
            
    clustered_tweets.append(temp_list)
    

# Sentiment Analysis

In [99]:
from textblob import TextBlob

In [100]:
average_polarity = []
average_subjectivity = []


for i in range(0,20):
    polarity = [] # Lies in range [-1,1]  Very negative sentiment to very positive
    subjectivity = [] # [0,1] Very objective statement to very subjective statement
    for current_tweet in tqdm(clustered_tweets[i]):
        text = TextBlob(current_tweet).sentiment

        polarity.append(text.polarity)
        subjectivity.append(text.subjectivity)
        
    average_polarity.append(np.mean(polarity))
    average_subjectivity.append(np.mean(subjectivity))


100%|██████████| 695/695 [00:00<00:00, 2055.33it/s]
100%|██████████| 501/501 [00:00<00:00, 2017.71it/s]
100%|██████████| 591/591 [00:00<00:00, 1905.76it/s]
100%|██████████| 625/625 [00:00<00:00, 2056.48it/s]
100%|██████████| 690/690 [00:00<00:00, 1937.06it/s]
100%|██████████| 677/677 [00:00<00:00, 1860.07it/s]
100%|██████████| 820/820 [00:00<00:00, 1846.85it/s]
100%|██████████| 869/869 [00:00<00:00, 1888.34it/s]
100%|██████████| 625/625 [00:00<00:00, 1856.85it/s]
100%|██████████| 782/782 [00:00<00:00, 1910.42it/s]
100%|██████████| 661/661 [00:00<00:00, 1798.47it/s]
100%|██████████| 622/622 [00:00<00:00, 1828.79it/s]
100%|██████████| 632/632 [00:00<00:00, 1750.49it/s]
100%|██████████| 636/636 [00:00<00:00, 1839.08it/s]
100%|██████████| 574/574 [00:00<00:00, 1816.32it/s]
0it [00:00, ?it/s]
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]


In [101]:
average_polarity

[0.04585297584443269,
 0.05585492315033233,
 0.11821968103735539,
 0.05050814602064602,
 0.03751016113516114,
 0.019643401793512576,
 0.04748294373027605,
 0.05443266989584596,
 0.04151953084415584,
 0.0356266936122275,
 0.04565104876422955,
 0.029615898979845928,
 0.032975253305356146,
 0.043571182482640816,
 0.024402216208346438,
 nan,
 nan,
 nan,
 nan,
 nan]

# Using Vader

In [147]:
clustered_tweets[0]

['https://t.co/J8n4Qr3z7A',
 'God the absolute tragic irony of an actor who’d starred in a film about enormous catastrophic climate change being… https://t.co/oGBGQW0Kih',
 'https://t.co/W0h7SiOQiO',
 'https://t.co/hCn1xt1z8f',
 'https://t.co/pTm4IFii5O',
 "Sooooo, the climate started the fires? Sounds like he's deflecting from CA not practicing land management in order… https://t.co/szetGXBF14",
 'Let the democrats keep this N mind &amp; root out every bit of this network of corruption. Root it ALL out, every Russi… https://t.co/P1Yktz9TJI',
 "Jean Paul Gaultier Drops Fur, Calls Industry 'Absolutely Deplorable' https://t.co/AT9Z0aoPrI #climate #organic… https://t.co/hunY0WkvOn",
 '“Our ultimate goal is for a low carbon sustainable future for all Africans and the shift from fossil fuels like coa… https://t.co/M3LTHG5GZG',
 '” If she’s right, maybe the sea change in public action we desperately need is closer than it seems. It would certa… https://t.co/PHHC4LvZKq',
 '"Sadly, far too muc

In [102]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyser = SentimentIntensityAnalyzer()

In [158]:
average_polarity = []

for i in range(0,20):
    polarity = [] # Lies in range [-1,1]  Very negative sentiment to very positive

    for current_tweet in tqdm(clustered_tweets[i]):
        pol = analyser.polarity_scores(current_tweet)
        polarity.append(pol)
        if pol['compound']<-0.85:
            print(pol['compound'])
            print(current_tweet)

    
    comp = []
    
    for j in polarity:
        comp.append(j['compound'])
        
    average_polarity.append(round(((np.mean(comp)+1)/2)*100,2))




100%|██████████| 695/695 [00:00<00:00, 3226.00it/s]
  0%|          | 0/501 [00:00<?, ?it/s]

-0.8652
What THE FUCK is up with this weather? I'm from Arizona, i'm freezing my nuts off in this shit. Days like this make… https://t.co/GnfHckLNza
-0.9022
Step 1: Delay, delay, delay.  Step 2: Blame The Devil. Step 3: Talk about global warming. My church is being run in… https://t.co/sxPAhFPYfU
-0.9118
31 dead, 100 injured, 6,400 homes destroyed and 1 in 13 Americans living under red flag fire alerts, yet… https://t.co/UHdbwNMy6r
-0.9153
Last week, Trump blamed the fire damage on “gross mismanagement” of forests in California and threatened to cut off… https://t.co/Ir0MLGc3n8


100%|██████████| 501/501 [00:00<00:00, 2598.98it/s]
  0%|          | 0/591 [00:00<?, ?it/s]

-0.8885
Correction: Billions will die if fossil fuels are banned. That's what these death cultists want. https://t.co/Tk6a8sqFQN
-0.9407
MOM! TRUMP DENIES GLOBAL WARMING BUT HES A DIRTY LAZY PIG (excuse 4 greed &amp; being filthy) https://t.co/FCyop8Yk9f
-0.9273
This season’s fires have been particularly destructive, but scientists and insurance companies fear the worst is ye… https://t.co/gxWvVMursk
-0.8619
idiot gov. brown blamed the fires on that bogus climate change crap! here we go with agenda 21! look up that one fo… https://t.co/7m83KBwIij
-0.8848
Where is #trump's science behind his claim? Sick and uncaring response! "31 dead and more than 200 missing as Calif… https://t.co/kCWRrvQpUf
-0.9153
Last week, Trump blamed fire damage on [California's] “gross mismanagement” of forests &amp; threatened to cut off fede… https://t.co/QPQagrEuKb
-0.906
Neil Young is among the many who lost his home. So sad to see the destruction and the lives lost. https://t.co/UELejpnCZh
-0.872
“Your lea

100%|██████████| 591/591 [00:00<00:00, 2780.82it/s]
 44%|████▍     | 278/625 [00:00<00:00, 2774.90it/s]

-0.875
How are people this stupid. The campfire becomes a giant destroying fire because of droughts caused by climate chan… https://t.co/pi8jjps8j7
-0.9022
That's all crap, it's is  your programs dumb ass: Jerry Brown Blames Climate Change for California Wildfires… https://t.co/OvUqfwMryB
-0.8513
This is just as dumb as the “Global Warming, my ass! It’s snowing!” argument. https://t.co/0b22RIfFTd
-0.8779
And Trump rants on behalf of the logging industry about poor foresting procedures: Wildfires kill 25 in California… https://t.co/iG9GFn9q1G


100%|██████████| 625/625 [00:00<00:00, 2770.24it/s]
 97%|█████████▋| 666/690 [00:00<00:00, 3100.24it/s]

-0.9022
That's all crap, it's is  your programs dumb ass: Jerry Brown Blames Climate Change for California Wildfires… https://t.co/OvUqfwMryB
-0.91
This World War I Battlefield Is a Haunting Reminder of the Environmental Costs of War https://t.co/BDN68ITRws… https://t.co/BFZDoniGG9
-0.8834
Lineup stupid people to eat up your gullible pile of crap lies for the day https://t.co/ksFMCofE1K
-0.872
Earlier this year California suffered the worst fire in the state's history because global warming has adversely af… https://t.co/Bbz0YDowBv
-0.8824
It's ironic that I've seen more media advertising apocalyptic, disaster films than I have media reporting on the da… https://t.co/B1unsk6Sgg
-0.886
Read this. It’s a frightening story about a government stealing, oppressing, and attempting to destroy its indigeno… https://t.co/vfgScc3kWU
-0.8797
10+ DEAD in #CaliforniaFires and the man who caused them through CRIMINAL forest mismanagement - far left Jerry Bro… https://t.co/AeYtouXy8d


100%|██████████| 690/690 [00:00<00:00, 3332.78it/s]
100%|██████████| 677/677 [00:00<00:00, 3222.67it/s]
  0%|          | 0/820 [00:00<?, ?it/s]

-0.8519
“Global warming, is the real thing‼️”

“You’re killing me❗️”

“Since 1970 mankind has killed 60% of all animals‼️”… https://t.co/Y0AwwhIcOf
-0.9001
Increased rate of wild fires that destroy homes and lives and leave trails of death and fear and loss is a direct r… https://t.co/NbNXPliVb6
-0.891
As the Southeast recovers from devastating hurricanes, California suffers from catastrophic wildfires.  The common… https://t.co/9un86FR6r4
-0.91
this is slavery and child abuse 

GET OFF FOSSIL FUELS - PGE should be fighting this https://t.co/Ro2QsxL8Sx
-0.8567
Trump also rejects climate change. He is ONE BIG LIE. #ONLY A COMPLETE IDIOT COULD SUPPORT HIM. HE IS PRO DEATH, PR… https://t.co/UZ2uozTb4I
-0.9438
Neil Young Loses Home in Woolsey Fire, Blasts Climate Change 'Denier' Trump 
SO SORRY YOU LOST YOUR HOME.  SAD.  LO… https://t.co/tNyMFq0Cyq
-0.8766
228 people still missing in Paradise as grim search for victims continues — Feeling So Bad &amp; Guilty for my part in… https://t.co/wm

100%|██████████| 820/820 [00:00<00:00, 2931.12it/s]
  0%|          | 0/869 [00:00<?, ?it/s]

-0.886
These Hellish fires in CA are beyond a Horror our weather has been unstable for the last years,it been Hotter &amp; Dry… https://t.co/nFtbgjQsyR
-0.8883
"In the face of worrying troubles like climate change, pollution, gun violence, and a highly charged atmosphere in… https://t.co/0qu3mkVhlp
-0.8632
Millions will die if fossil fuels *are* banned, you fucking buffoon.

Are people really this ignorant when it comes… https://t.co/u2GsDtwNPm
-0.9054
Seriously? How many people has nuclear war killed compared to abortion? 
Also, nice to see that sin didn’t make it… https://t.co/ytN3CuH2mV
-0.9246
😫 Bitch ass Republicans ordered the climate change probe to be destroyed and now it’s fucking up the Research


100%|██████████| 869/869 [00:00<00:00, 3007.91it/s]
100%|██████████| 625/625 [00:00<00:00, 3469.60it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

-0.9359
I'm so sick off this idiot Gov. Brown he's such a freaking moron. Trying to blame these fires on climate change.. L… https://t.co/QDWnB1cCV6
-0.9274
Neil Young used to oppose US war crimes &amp; terrorism, racism &amp; political corruption. That was many, many years ago. https://t.co/CFHTS0pYS4
-0.9468
This is cruel and irresponsible.

Using death and tragedy to falsely attack the Party that believes in &amp; educates o… https://t.co/DVqN62Ztc3
-0.9325
We are at war with Climate Change, but instead of fighting the enemy, we are aiding, abetting, ignoring, &amp; fueling… https://t.co/nfsy2h1phr
-0.8822
.@JerryBrownGov - THEN BAN FRACKING &amp; STOP ISSUING DRILLING PERMITS! Gov. Brown Blames Climate Change for Fires as… https://t.co/fuEjgzE2oQ
-0.9025
Gov Brown's failure caused California fire catastrophy.  He failed to manage forest and climate.  So blame others

https://t.co/WjzCsAaBxI
-0.8779
NRDC's Heglar: "Climate change is bad. Real bad. Imagine a terrible nightmare. Multi

100%|██████████| 782/782 [00:00<00:00, 3319.97it/s]
  0%|          | 0/661 [00:00<?, ?it/s]

-0.91
https://t.co/FRECTlFWw2  How many’ll die? How much of America will be destroyed to apocalyptic proportion before vo… https://t.co/oclBYPsDHQ
-0.8689
How does one man consistently say and do the wrong thing?  He is as dumb as a doorknob.  His stupid stance on clima… https://t.co/scjSp5kFDV
-0.8689
#Republicans Educate yourselves instead of blaming dead people and victims. It’s called climate change. Listen to t… https://t.co/sgJ9rDeZXp
-0.8689
#Republicans Educate yourselves instead of blaming dead people and victims. It’s called climate change. Listen to t… https://t.co/ejpRZGiTIF
-0.9217
Youre stupid. Educate yourself instead of blaming dead people and victims. It’s called climate change. Listen to th… https://t.co/rjBpgljrbs
-0.891
#Neil_Young criticises @realdonaldtrump lose home in California fires shame @Usatoday poorest #Lesotho  
 @debeers… https://t.co/3DRyZMlgbm
-0.8767
BBC News - Climate change protests leads to '22 arrests' over blockade

Arrested? For protesting? 

*h

100%|██████████| 661/661 [00:00<00:00, 2911.06it/s]
  0%|          | 0/622 [00:00<?, ?it/s]

-0.8553
Fire chief: climate change helped make California wildfires more devastating

Daryl Osby says fire in north of stat… https://t.co/YML1vLd5t9
-0.8826
Every day without action is another day that the U.S. government shrugs off the most devastating catastrophe in hum… https://t.co/nOgxlIamoz
-0.8947
It's UP! @POLITICO CA Playbook: TRUMP ATTACKS as FIRES RAGE -- BROWN warns climate change deniers contribute to 'tr… https://t.co/BXoMgHkAJd
-0.875
Every time I think about climate change I legit start to have a panic attack, this shit ain’t right man
-0.8834
"Can globalization and the fight against climate change co-exist? Or is the environment doomed to suffer as people… https://t.co/JvkAqiUByl


100%|██████████| 622/622 [00:00<00:00, 2606.06it/s]
  0%|          | 0/632 [00:00<?, ?it/s]

-0.9217
false enviro catastrophe. climate change terrorists and SOE money scam. a bit too obvious... now boys anyone with the stones to act on it ?
-0.9201
Irresponsible Waste Disposal

Irresponsible disposal of waste can cause many different environmental problems. It c… https://t.co/j3wTsY4O9Y
-0.8905
Climate change is real and is actively killing people. Our failure to act now means that even more will die before… https://t.co/POsi28UUwR
-0.9044
OMG People are so stupid!! What do they want for climate change??? Money. What the hell will that do??? #letshurtthepeoplemore
-0.8877
118 Currently on FIRE. Fire Chief States LEAVE before evacuation orders if fire threatens your area. Also states Tr… https://t.co/yUuYmQvn9h


100%|██████████| 632/632 [00:00<00:00, 2490.75it/s]
  0%|          | 0/636 [00:00<?, ?it/s]

-0.9468
#California suffered a devastating weekend as #wildfires raging in both the south and north of the state killed 31… https://t.co/n0uKJd94a1
-0.9092
CNN Meteorologist Explains Why President Trump is So Wrong About California&amp;#39;s Wildfires
TRUMP IS A DAMN IDIOT B… https://t.co/zeG66gvERz
-0.8832
Scientifcally and morally unacceptable that President Trump blames forest management on the most devastating forest… https://t.co/yexJlBrrYj
-0.8625
31 dead in devastating #California #wildfires. Here's what to know - TIME #disaster #climate #Woolseyfires… https://t.co/CSt0078RSN
-0.9468
California suffered a devastating weekend as wildfires raging in both the south and north of the state killed 31 an… https://t.co/4COgWb8Tri
-0.8938
CALIFORNIA FASCIST DICTATOR JERRY BROWNSHIRT SAYS 'CLIMATE CHANGE DENIERS' ARE PARTLY TO BLAME FOR CALIFORNIA WILDF… https://t.co/fd9KFkWGj7
-0.9041
“government’s failure to take action on climate change” is simply our failure. Stop coaching helplessnes

100%|██████████| 636/636 [00:00<00:00, 2697.01it/s]
  0%|          | 0/574 [00:00<?, ?it/s]

-0.8658
Trump blaming Liberal Californians for the fires is no crazier than the Left blaming every natural disaster on Global Warming.
-0.886
I believe if there were no hungry or poor in the world there'd be no war. I believe striving for that goal is the p… https://t.co/DAP0OGMOLm
-0.8543
Too bad natl embarrassment &amp; loss of stature in the world aren't worth money. @realdonaldtrump opens his mouth &amp; we… https://t.co/K3KJUP4XTm
-0.9001
New article about #climate from Guardian: You thought fake news was bad? Deep fakes are where truth goes to die https://t.co/C6zNEAlwVu
-0.9001
At least 29 dead in #CampFire, 2 dead in #WoolsyFire. Camp fire is 25% contained, Woolsey is 20%. Santa Anas expect… https://t.co/LfexgDsLKC
-0.9306
Pisses me off that people are blaming Trump for the fires... HOW THE FUCK IS IT HIS FAULT!?! NEWSFLASH: YOUR PRECIO… https://t.co/1cakcr5GtN
-0.8983
Climate Change is real and for dooming us all, F*~&gt; people who deny or don't care! 🤬

Plant trees, reduce w

100%|██████████| 574/574 [00:00<00:00, 2865.80it/s]
0it [00:00, ?it/s]
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

-0.8553
She thinks your husband is a fucking dipshit for thinking climate change is fake and she also hates him for opening… https://t.co/MMFYuSFXHN
-0.9178
Crazy thought, but what with debt levels reaching crisis points across the globe, massive borrowing, insane consump… https://t.co/NWoYDefBhv
-0.8625
#climatechange @UN_HRC government in Chile attacks with repression with physical and psychological violence against… https://t.co/gpSQIlvcXa
-0.8689
OMG and the "Rosa Parks is too politically correct" but NO ONE gave a shit when there were hundreds of Ecclestone/T… https://t.co/kAHaDYPpdg
-0.9168
TAKE A FEW MORE YARDS, THEY GOT A LOT OF FOLKS TO IGNORE FACTS ABOUT FAKE CLIMATE CHANGE, FRAUD! VOTE FOR ACTS BEFO… https://t.co/q2KLzkjmXA
-0.8748
Jerry Brown Blames Climate Change for California Wildfires. Talk about ignoring science! All those dead trees you r… https://t.co/xLJJXkN2YZ
-0.8903
You fucking, moronic, racist pig. No help for California because the mismanaged their land use. He




In [105]:
no_tweets = []

for i in clustered_tweets:
    print(len(i))
    no_tweets.append(len(i))

695
501
591
625
690
677
820
869
625
782
661
622
632
636
574
0
0
0
0
0


# Scrape images using BeauituflSoup and crate a dictionary to store information for Flask app

In [134]:
from collections import defaultdict
from bs4 import BeautifulSoup
import urllib
import ssl
import os

climate_topics = []

ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

num = 1


for i,v in enumerate(compressed_topics):
    topic_dict = {}
    topic_dict['topic'] = [' '.join(k.split('+')) for k in v[0:5]]
    topic_dict['topic_no'] = num
    topic_dict['link'] = url_list[i]
    topic_dict['image'] = "/static/c"+str(num)+".jpg"
    topic_dict['no_tweets'] = no_tweets[num-1]
    topic_dict['sent'] = average_polarity[num-1]
    
    if average_polarity[num-1] > 50:
        topic_dict['sent_color'] = 'mediumseagreen'
    else:
        topic_dict['sent_color'] = 'orangered'
    
    climate_topics.append(topic_dict)
    
    if len(v)>5:
        print('hello')
        v = v[:6]
    
    #v.sort(key = len,reverse=True)
 
    
    query = "+".join(v[0:5])
    print(query)
    
    url = "http://www.bing.com/image/search?q=%s" % (query)
    uh= urllib.request.urlopen(url, context=ctx)
    html =uh.read().decode()
    
    try:
        soup = BeautifulSoup(html, 'html.parser')
        mydiv = soup.find("div", class_="img_cont hoff")
        link = mydiv.findChildren()[0]['src']
        
    except AttributeError:     
        
        query = "+".join(v[0:1])
        print(query)
        url = "http://www.bing.com/image/search?q=%s" % (query)
        uh= urllib.request.urlopen(url, context=ctx)
        html =uh.read().decode()
        soup = BeautifulSoup(html, 'html.parser')
        mydiv = soup.find("div", class_="img_cont hoff")
        link = mydiv.findChildren()[0]['src']
        

    
    fullfilename = os.path.join('/Users/nazim/Desktop/project_fletcher/flask_app/static', "c"+str(num)+".jpg")
    urllib.request.urlretrieve(str(link), fullfilename)
    num +=1


flooding+deserts+help+deserts+help+stop+would
hello
california+trump+fires+today+level
big+oil+claims+study+even+part+new
hello
california+wildfires+scientists+politics+brown+trump
government+energy+department+protesters+blockade+blockade+government
neil+young+loses+young+loses+home+trump
california+wildfires+jerry+brown+blames+blames+california
hello
california+fires+neil+young+criticises+losing+trump
california+fires
hello
temperature+california+carbon+energy+bears
protests+leads+arrests+california+blames+trump+fires
ran+republican+stronghold+democrat+ran+california+fire
hello
california+wildfires+people+blames+trump
hello
california+management+wildfires+president+forest
california+fires+fires+happening+fossil+fuels+energy+blame
california+fires
hello
ordinary+people+researchers+california+according+every


In [135]:
climate_topics

[{'topic': ['flooding deserts help', 'deserts help stop', 'would'],
  'topic_no': 1,
  'link': 'http://www.google.com/search?q=would+deserts+help+stop+flooding+deserts+help&btnI',
  'image': '/static/c1.jpg',
  'no_tweets': 695,
  'sent': 52.28,
  'sent_color': 'mediumseagreen'},
 {'topic': ['california', 'trump', 'fires', 'today', 'level'],
  'topic_no': 2,
  'link': 'http://www.google.com/search?q=california+trump+fires+need+get+today+us+sea+level&btnI',
  'image': '/static/c2.jpg',
  'no_tweets': 501,
  'sent': 49.32,
  'sent_color': 'orangered'},
 {'topic': ['big oil claims', 'study', 'even', 'part', 'new'],
  'topic_no': 3,
  'link': 'http://www.google.com/search?q=new+even+part+study+big+oil+claims&btnI',
  'image': '/static/c3.jpg',
  'no_tweets': 591,
  'sent': 53.03,
  'sent_color': 'mediumseagreen'},
 {'topic': ['california wildfires',
   'scientists',
   'politics',
   'brown',
   'trump'],
  'topic_no': 4,
  'link': 'http://www.google.com/search?q=california+wildfires+us+ev

# Pickle dictionary with information

In [136]:
import pickle

with open('climate.pkl', 'wb') as picklefile:
    pickle.dump(climate_topics, picklefile)
    
with open('climate_lda.pkl', 'wb') as picklefile:
    pickle.dump(lda, picklefile)
    