#### Simple Approach for Emotion Analysis

In this notebook, we apply a simple approach to capture the emotions expressed in tweets.
first, we clean the tweet by removing https links,hashtags, and mentions. then we remove stop words.
after that we use the NRC Emotion lexicon to look for emotion mapping for each word in a tweet and generate an emotion vector with 8 entries for each tweet, each entry in the vector represent a single emotion.

In [1]:
import pandas as pd
from textblob import TextBlob
import numpy as np
import  csv
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline 
import re
from nltk.corpus import stopwords
import nltk

In [14]:
## text cleaning

def clean_up_text(text):
    '''
    remove https links, hashtags and mentions
    '''
    hashtag = re.compile(r'[#]\w*')
    https = re.compile(r'https?:\/\/[a-zA-z0-9\/#%\.]+')
    mention = re.compile(r'[@]\w*')
    text = re.sub(hashtag, '', text)
    text = re.sub(https, '', text)
    text = re.sub(mention, '', text)
    return text


In [15]:
def remove_stop_words(text_list,stop_word_language):
    print("remove stop words start")
    c_t=[]
    for text in text_list:
        t=clean_up_text(text)
        c_t.append(t)
        
    nostopwords_lower_list=[]
    i=0
    for text in c_t:
        words_list=nltk.tokenize.word_tokenize(text)
        filtered_words = [word for word in words_list if word not in stopwords.words(stop_word_language)]
        filtered_words=' '.join(filtered_words)
        nostopwords_lower_list.append(filtered_words.lower())
        if (i%10000 == 0):
            print(i)
        i+=1
    return nostopwords_lower_list
    #do more text cleaning 
    
    
    
    

In [138]:
def emotion_analysis(tweet_file_path, language, stop_word_language):
    tweet=pd.read_csv(tweet_file_path)
    print("number of weets: ",len(tweet))
    #text_list=tweet['text'].tolist()
    #nostopwords_lower_list=remove_stop_words(text_list,stop_word_language)
    #tweet['cleaned_text']=nostopwords_lower_list
    
    
    nostopwords_lower_list=tweet['cleaned_text'].tolist()
    
    
    lec=pd.read_excel('NRC-Emotion-Lexicon-v0.92-InManyLanguages-web.xlsx')
    lec_language=lec[[language,'Positive','Negative','Anger','Anticipation','Disgust','Fear','Joy','Sadness','Surprise','Trust']]
    lec_language[language]=lec_language[language].str.lower()
    lec_language=lec_language.drop_duplicates(language)
    lec_language=lec_language.set_index([language])

    print("start emotion analysis")
    ee=0
    emotion_final_list=[]
    #y=1
    for text in nostopwords_lower_list:
        s=str(text).split(' ')
        #tweet=pd.DataFrame()
        emotion_list=[]
        for ss in s:
            if ss in lec_language.index:

            #row=lec_language.loc[lec_language[language] == ss]
                row=lec_language.loc[ss]


                ll=[row[0],row[1],row[2],row[3],
                    row[4],row[5],
                    row[6],row[7],row[8],row[9]]
                
                emotion_list.append(ll)

        #print("tweet: ", y)
        #y+=1
        #print(emotion_list)

        k=[sum(i) for i in zip(*emotion_list)]
        if(k==[]):
            emotion_final_list.append([0,0,0,0,0,0,0,0,0,0])
        else:
            emotion_final_list.append(k)
        ee+=1
        if (ee%100000 == 0):
            print(ee)
    
    tweet=pd.read_csv(tweet_file_path)
    tweet['cleaned_text']=nostopwords_lower_list
    
    headers=['Positive','Negative','Anger','Anticipation','Disgust','Fear','Joy','Sadness','Surprise','Trust']
    df = pd.DataFrame(emotion_final_list, columns=headers)
    
    
    tweet=pd.concat([tweet,df], axis=1)
    
    return tweet
    
    

# English Tweets

In [None]:
tweets_with_emptions=emotion_analysis('intermediate_data/en_cleaned.csv', 'English Word','english')

In [None]:
#tweets_with_emptions.to_csv('results/en_emotions.csv')

# French Tweets

In [None]:
tweets_with_emptions=emotion_analysis('intermediate_data/fr_cleaned.csv', 'French Translation (Google Translate)','french')

In [95]:
tweets_with_emptions.head()

Unnamed: 0,id,userId,createdAt,text,canton,language,cleaned_text,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,9517199000.0,14393717.0,2010-02-23 08:02:57,Un peu de réconfort liquide en take away après...,VD,fr,un peu réconfort liquide take away après début...,1,0,0,0,0,0,0,0,0,1
1,9518015000.0,14393717.0,2010-02-23 08:40:13,Au charbon! (@ BCV St-François) http://4sq.com...,VD,fr,au charbon ! ( bcv st-françois ),0,0,0,0,0,0,0,0,0,0
2,9525122000.0,14465180.0,2010-02-23 13:20:45,C'est quoi un laptop geek? Un lapsus! :),VD,fr,c'est quoi laptop geek ? un lapsus ! : ),0,0,0,0,0,0,0,0,0,0
3,9567369000.0,6589882.0,2010-02-24 08:10:43,Dans le train pour Genève,VD,fr,dans train genève,0,0,0,0,0,0,0,0,0,0
4,9571348000.0,14393717.0,2010-02-24 11:06:29,"Argh, pas de phó! (@ Goûts d'Asie) http://4sq....",VD,fr,"argh , phó ! ( goûts d'asie )",0,0,0,0,0,0,0,0,0,0


In [97]:
tweets_with_emptions.to_csv('emotions_data/fr_final_emotion.csv',index=None)

# German Tweets

In [139]:
tweets_with_emotions=emotion_analysis('intermediate_data/de_cleaned.csv', 'German Translation (Google Translate)', 'german')

number of weets:  1402170


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


start emotion analysis
100000
200000
300000
400000
500000
600000
700000
800000
900000
1000000
1100000
1200000
1300000
1400000


In [143]:
tweets_with_emotions.tail(15)

Unnamed: 0,id,userId,createdAt,text,canton,language,cleaned_text,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
1402155,7.768185e+17,1006063000.0,2016-09-16 16:22:26,@Jackobli @dockweiler69 also ich würde Dir den...,BS,de,dir vegi burger empfehlen . zur feier tages gu...,4,0,0,1,0,0,2,0,1,2
1402156,7.768188e+17,26943860.0,2016-09-16 16:23:33,Wochenendlektüre: Learnings aus 11 Content Mar...,BL,de,wochenendlektüre : learnings 11 content market...,0,0,0,0,0,0,0,0,0,0
1402157,7.768191e+17,330148300.0,2016-09-16 16:24:39,Cam takes Switzerland,BE,de,cam takes switzerland,0,0,0,0,0,0,0,0,0,0
1402158,7.768193e+17,135559100.0,2016-09-16 16:25:25,Da entdeckt grad wer Twitter ;) @albin1982 #di...,ZH,de,da entdeckt grad wer twitter ; ),1,0,0,0,0,0,0,0,0,0
1402159,7.7682e+17,7.354492e+17,2016-09-16 16:28:25,hehehehhehehe https://t.co/QvqpvctCbO,TI,de,hehehehhehehe,0,0,0,0,0,0,0,0,0,0
1402160,7.768202e+17,85606520.0,2016-09-16 16:29:14,"#Lineup #ZSCLHC \nSchäppi zurück, 1. Meistersc...",ZH,de,"schäppi zurück , 1 . meisterschaftsspiel heute...",0,1,0,0,1,1,0,1,0,0
1402161,7.768205e+17,179610200.0,2016-09-16 16:30:08,Wieviel kostet ein Heli-Transfer von Visp nach...,ZH,de,wieviel kostet heli-transfer visp st. gallen ?,0,0,0,0,0,0,0,0,0,0
1402162,7.768205e+17,7.666746e+17,2016-09-16 16:30:11,Dratini – beim Inseli bis 18:42:59 https://t.c...,LU,de,dratini – beim inseli 18:42:59,0,0,0,0,0,0,0,0,0,0
1402163,7.768205e+17,3132184000.0,2016-09-16 16:30:15,meine katze hält sich für ein Iltis du hasst s...,LU,de,katze hält iltis hasst getrigert,0,0,0,0,0,0,0,0,0,0
1402164,7.768212e+17,429334600.0,2016-09-16 16:32:58,@walliserstefi bi defür allwäg morn morge no c...,ZH,de,bi defür allwäg morn morge no churz wallis,0,0,0,0,0,0,0,0,0,0


In [145]:
tweets_with_emotions.to_csv('emotions_data/de_final_emotion.csv',index=None)

# Italian Tweets

In [146]:
tweets_with_emotions=emotion_analysis('intermediate_data/it_cleaned.csv', 'Italian Translation (Google Translate)', 'italian')

number of weets:  484415


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


start emotion analysis
100000
200000
300000
400000


In [153]:
tweets_with_emotions.tail(1000)

Unnamed: 0,id,userId,createdAt,text,canton,language,cleaned_text,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
483415,7.754409e+17,5.336007e+08,2016-09-12 21:08:06,"@nickheizmann Oh, là, là! Für mich einen Bacar...",AG,it,"oh , là , là ! für mich einen bacardi cola , b...",0,0,0,0,0,0,0,0,0,0
483416,7.754414e+17,7.354492e+17,2016-09-12 21:10:14,"buonanotte\n\ncara per favore, fai ragionare C...",TI,it,"buonanotte cara favore , ragionare clarisse ...",0,0,0,0,0,0,0,0,0,0
483417,7.754438e+17,7.022333e+08,2016-09-12 21:19:36,#lenaturali antipatiche! Chi vi credete?Non pe...,TI,it,antipatiche ! chi credete ? non pensavo così a...,0,0,0,0,0,0,0,0,0,0
483418,7.754464e+17,4.806186e+08,2016-09-12 21:30:02,@ValeriaSirigu brutto da dire ma finché ci sar...,TI,it,brutto dire finché erdogan dirigere turchia co...,0,1,1,0,1,1,0,1,0,0
483419,7.754476e+17,1.599584e+08,2016-09-12 21:34:48,"nunca corte a franja no FaceTime, dica",GE,it,"nunca corte franja no facetime , dica",0,1,1,1,0,1,0,0,0,0
483420,7.754481e+17,9.879025e+08,2016-09-12 21:36:56,Well done @christianmeier @Meierwife ! #coffee...,ZH,it,well done !,0,0,0,0,0,0,0,0,0,0
483421,7.754484e+17,3.165883e+08,2016-09-12 21:37:56,Non vi sembra che Tina ogni tanto abbia un non...,ZH,it,non sembra tina ogni tanto so sarah kennedy ?,0,0,0,0,0,0,0,0,0,0
483422,7.754485e+17,7.105758e+17,2016-09-12 21:38:25,"""Stantastic",ZH,it,`` stantastic,0,0,0,0,0,0,0,0,0,0
483423,7.754509e+17,3.740898e+09,2016-09-12 21:48:04,@aurelia1916 @_leticiamartin_ @Urs_Buhler Vi a...,OW,it,vi auguro tanta felicità ! ❤️❤️,1,0,0,0,0,0,1,0,0,0
483424,7.754510e+17,1.929394e+08,2016-09-12 21:48:27,"Anch'io, anch'io il giro scale! @ Switzerland ...",TI,it,"anch'io , anch'io giro scale ! switzerland lugano",0,0,0,0,0,0,0,0,0,0


In [155]:
tweets_with_emotions.to_csv('emotions_data/it_final_emotion.csv',index=None)

# Spanish Tweets

In [156]:
tweets_with_emptions=emotion_analysis('intermediate_data/es_cleaned.csv', 'Spanish Translation (Google Translate)', 'spanish')

number of weets:  443822


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


start emotion analysis
100000
200000
300000
400000


In [157]:
tweets_with_emptions.head(1000)

Unnamed: 0,id,userId,createdAt,text,canton,language,cleaned_text,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,9.773929e+09,14657884.0,2010-02-28 11:58:01,Pecando! [=o)] (@ Brasserie des Halles de l'Il...,GE,es,pecando ! [ =o ) ] ( brasserie des halles l'ile ),0,0,0,0,0,0,0,0,0,0
1,1.026926e+10,14657884.0,2010-03-10 12:41:58,@kbgabt casi casi me ganas en el Doodle Jump [...,GE,es,"casi casi ganas doodle jump [ =op ] , visto pu...",0,0,0,0,0,0,0,0,0,0
2,1.046313e+10,14657884.0,2010-03-14 09:00:31,"Para terminar la semana, bádminton! [=o)] (@ c...",GE,es,"para terminar semana , bádminton ! [ =o ) ] ( ...",0,0,0,0,0,0,0,1,0,0
3,1.063753e+10,6165432.0,2010-03-17 20:35:16,Dolores ;-) http://twitpic.com/196wkm,ZH,es,dolores ; - ),0,0,0,0,0,0,0,0,0,0
4,1.066930e+10,14657884.0,2010-03-18 12:48:05,@belmontito cuándo te lo devuelven?,GE,es,cuándo devuelven ?,0,0,0,0,0,0,0,0,0,0
5,1.071731e+10,14657884.0,2010-03-19 11:08:00,muy grave mi resaca deportiva… dos días y peor...,GE,es,grave resaca deportiva… dos días peor hoy ayer...,0,2,0,0,0,2,0,2,0,0
6,1.082848e+10,15138496.0,2010-03-21 16:53:22,"Watching ""El Secreto de sus Ojos"" :) #fb (@ Ci...",FR,es,watching `` el secreto ojos '' : ) ( cinéma rex ),0,0,0,0,0,0,0,0,1,1
7,1.083564e+10,14657884.0,2010-03-21 20:02:02,"Sin comparación, la próxima vez al Boreal!! (@...",GE,es,"sin comparación , próxima vez boreal ! ! ( sta...",0,0,0,0,0,0,0,0,0,0
8,1.129694e+10,14657884.0,2010-03-30 07:32:46,@belmontito recién me acordé qué era eso… [=oP...,GE,es,recién acordé eso… [ =op ] 1 ) tweet-poster ? ...,0,0,0,0,0,0,0,0,0,0
9,1.141138e+10,17383825.0,2010-04-01 06:35:17,Au top de la mode @mccasal - Photo: http://bki...,GE,es,au top mode - photo :,0,0,0,0,0,0,0,0,0,0


In [158]:
tweets_with_emptions.Joy.sum()

69371

In [159]:
tweets_with_emptions.to_csv('emotions_data/es_final_emotion.csv',index=None)

# portuguese Tweets

In [160]:
tweets_with_emptions=emotion_analysis('intermediate_data/pt_cleaned.csv', 'Portuguese Translation (Google Translate)', 'portuguese')

number of weets:  476702


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


start emotion analysis
100000
200000
300000
400000


In [161]:
tweets_with_emptions.head(1000)

Unnamed: 0,id,userId,createdAt,text,canton,language,cleaned_text,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,1.008459e+10,14657884.0,2010-03-06 18:36:05,Mojito!! (@ Café Cuba) http://4sq.com/bfcPqH,GE,pt,mojito ! ! ( café cuba ),1,0,0,0,0,0,0,0,0,0
1,1.008861e+10,14657884.0,2010-03-06 20:34:22,Cervecitas!! (@ Alhambar) http://4sq.com/94RtEk,GE,pt,cervecitas ! ! ( alhambar ),0,0,0,0,0,0,0,0,0,0
2,1.046764e+10,15050292.0,2010-03-14 12:32:31,"Brazilian food, hmm… — at Casa da Picanha http...",ZH,pt,"brazilian food , hmm… — at casa picanha",0,0,0,0,0,0,0,0,0,0
3,1.051281e+10,8836532.0,2010-03-15 11:13:36,@herval lembre-se que para conseguir qualquer ...,VD,pt,lembre-se conseguir qualquer visto vc precisa ...,0,0,0,0,0,0,0,0,0,0
4,1.053735e+10,14657884.0,2010-03-15 22:07:18,Mojitos por siempre!! [=o)] (@ Café Cuba) http...,GE,pt,mojitos siempre ! ! [ =o ) ] ( café cuba ),1,0,0,0,0,0,0,0,0,0
5,1.069009e+10,8836532.0,2010-03-18 21:26:16,@herval e qual desses dois tipos voce é?,VD,pt,desses dois tipos voce é ?,0,0,0,0,0,0,0,0,0,0
6,1.164121e+10,7518012.0,2010-04-05 13:28:23,Radical Powder Rides http://twitvid.com/T8XVP,GR,pt,radical powder rides,0,0,0,0,0,0,0,0,0,0
7,1.272329e+10,13993622.0,2010-04-23 20:39:29,@__roger Android.modaco.com dort im Forum,ZH,pt,android.modaco.com dort im forum,0,0,0,0,0,0,0,0,0,0
8,1.383274e+10,15644040.0,2010-05-12 05:00:29,Barça - here I come!,SG,pt,barça - here i come !,0,0,0,0,0,0,0,0,0,0
9,1.449214e+10,14417443.0,2010-05-22 13:04:02,hip hop demo ;) (@ Fnac) http://4sq.com/atQmVa,FR,pt,hip hop demo ; ) ( fnac ),0,0,0,0,0,0,0,0,0,0


In [164]:
tweets_with_emptions.Joy.sum()

75494

In [165]:
tweets_with_emptions.to_csv('emotions_data/pt_final_emotion.csv',index=None)

# Turkish Tweets

In [166]:
tweets_with_emptions=emotion_analysis('intermediate_data/tr_cleaned.csv', 'Turkish Translation (Google Translate)', 'turkish')

number of weets:  331035


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


start emotion analysis
100000
200000
300000


In [167]:
tweets_with_emptions.head(1000)

Unnamed: 0,id,userId,createdAt,text,canton,language,cleaned_text,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,1.033064e+10,14703863.0,2010-03-11 17:01:54,Gratis-Blüemli am HB. :-),ZH,tr,gratis-blüemli am hb . : - ),0,0,0,0,0,0,0,0,0,0
1,1.193776e+10,14465180.0,2010-04-10 13:55:44,@kathy_1 yerk! C'est sale!,VD,tr,yerk ! c'est sale !,0,0,0,0,0,0,0,0,0,0
2,1.241565e+10,1079931.0,2010-04-18 20:17:01,Hihi @bloggingtom bei Giaccobo/Müller,LU,tr,hihi bei giaccobo/müller,0,0,0,0,0,0,0,0,0,0
3,1.247330e+10,6416332.0,2010-04-19 20:03:40,@digichr yep!,GR,tr,yep !,0,0,0,0,0,0,0,0,0,0
4,1.353608e+10,120433.0,2010-05-07 07:49:11,Dr Biävönüüü! http://snipsnap.ch/i536l /via @s...,BE,tr,dr biävönüüü ! /via,0,0,0,0,0,0,0,0,0,0
5,1.374440e+10,1775871.0,2010-05-10 19:45:05,@rubenstutz yummy :-),AG,tr,yummy : - ),0,0,0,0,0,0,0,0,0,0
6,1.503643e+10,1079931.0,2010-05-30 11:50:07,@marcelwidmer klar... Halle,LU,tr,klar ... halle,0,0,0,0,0,0,0,0,0,0
7,1.572426e+10,15402923.0,2010-06-08 18:34:26,#yatl @webtuesday,ZH,tr,,0,0,0,0,0,0,0,0,0,0
8,1.630965e+10,14274896.0,2010-06-16 14:22:20,Tüüüüüür! http://twitpic.com/1x9w1c,BE,tr,tüüüüüür !,0,0,0,0,0,0,0,0,0,0
9,1.687275e+10,17482788.0,2010-06-23 19:48:19,Özil akbar! 1:0!,ZH,tr,özil akbar ! 1:0 !,0,0,0,0,0,0,0,0,0,0


In [169]:
tweets_with_emptions.Joy.sum()

39538

In [170]:
tweets_with_emptions.to_csv('emotions_data/tr_final_emotion.csv',index=None)

# Dutch Tweets

In [171]:
tweets_with_emptions=emotion_analysis('intermediate_data/nl_cleaned.csv', 'Dutch Translation (Google Translate)', 'dutch')

number of weets:  197212


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


start emotion analysis
100000


In [172]:
tweets_with_emptions.head(1000)

Unnamed: 0,id,userId,createdAt,text,canton,language,cleaned_text,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,9.579249e+09,17197547.0,2010-02-24 15:05:13,iPhone meet Nexus1 http://yfrog.com/1exp2aj,ZH,nl,iphone meet nexus1,0,0,0,0,0,0,0,0,0,0
1,9.644522e+09,15017105.0,2010-02-25 21:38:17,Beginning to feel @ home. — at Hangkante http...,ZH,nl,beginning to feel home . — at hangkante,0,0,0,0,0,0,0,0,0,0
2,9.795116e+09,8717982.0,2010-02-28 22:29:28,Ueeeeeeeeeeeeeooo,BL,nl,ueeeeeeeeeeeeeooo,0,0,0,0,0,0,0,0,0,0
3,9.816631e+09,17062826.0,2010-03-01 08:20:53,"#arzurich Developers, designers, geotaggers......",ZH,nl,"developers , designers , geotaggers ... all he...",0,0,0,0,0,0,0,0,0,0
4,1.023115e+10,14393717.0,2010-03-09 18:02:28,"Tapas et Hoegaarden, joli apéro! (@ Chez Mathi...",VD,nl,"tapas et hoegaarden , joli apéro ! ( chez math...",0,0,0,0,0,0,0,0,0,0
5,1.023829e+10,1079931.0,2010-03-09 21:19:50,"@adrianbucher @fellmerlloyd ja, geht ja gar ni...",LU,nl,", geht gar nicht das man den tweet nicht kapie...",0,1,1,0,0,0,0,0,0,0
6,1.027138e+10,14182911.0,2010-03-10 13:41:51,been eben,VD,nl,been eben,0,0,0,0,0,0,0,0,0,0
7,1.027856e+10,16198929.0,2010-03-10 16:34:50,Buying dinner (@ Coop St François),VD,nl,buying dinner ( coop st françois ),0,0,0,0,0,0,0,0,0,0
8,1.038342e+10,15138496.0,2010-03-12 18:13:20,Good beer here!! (@ Café de l'Ancienne Gare) h...,FR,nl,good beer here ! ! ( café l'ancienne gare ),1,0,0,0,0,0,0,0,0,0
9,1.042332e+10,15498938.0,2010-03-13 14:18:21,Old school meets new school. #studierstube #fb...,ZH,nl,old school meets new school .,0,0,0,0,0,0,0,0,0,2


In [174]:
tweets_with_emptions.Joy.sum()

6790

In [175]:
tweets_with_emptions.to_csv('emotions_data/nl_final_emotion.csv',index=None)

# Arabic Tweets

In [176]:
tweets_with_emptions=emotion_analysis('intermediate_data/ar_cleaned.csv', 'Arabic Translation (Google Translate)', 'arabic')

number of weets:  229191


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


start emotion analysis
100000
200000


In [177]:
tweets_with_emptions.head(1000)

Unnamed: 0,id,userId,createdAt,text,canton,language,cleaned_text,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,1.756668e+10,14331452.0,2010-07-02 12:05:23,@alfarhan خطيبنا تحدث عن علاقة الآباء بالابناء...,BE,ar,خطيبنا تحدث علاقة الآباء بالابناء كيف الاسلام ...,0,0,0,0,0,0,0,0,0,0
1,2.091966e+10,14331452.0,2010-08-11 21:52:55,للتو.. انتهينا من الراويح,BE,ar,للتو.. انتهينا الراويح,0,0,0,0,0,0,0,0,0,0
2,2.408070e+10,14331452.0,2010-09-10 05:47:05,في الطريق الى صلاة الجمعة.. تقبل الله منا و من...,BE,ar,الطريق صلاة الجمعة.. تقبل الله منا منكم صالح ا...,2,0,0,1,0,1,1,0,0,1
3,2.408073e+10,14331452.0,2010-09-10 05:47:46,عفوا قصدت صلاة العيد,BE,ar,عفوا قصدت صلاة العيد,1,0,0,1,0,0,1,0,1,1
4,2.627465e+10,14331452.0,2010-10-03 15:12:59,افكار كثيرة لمشاريع تجول في خاطري لكن مع الاسف...,BE,ar,افكار كثيرة لمشاريع تجول خاطري الاسف الدراسة ا...,0,0,0,0,0,0,0,0,0,0
5,3.039150e+15,8883972.0,2010-11-12 10:59:24,@Ghonim مهرجان البرمجة للجميع....تحت رعاية الس...,ZH,ar,مهرجان البرمجة للجميع ... .تحت رعاية السيدة ال...,1,0,0,1,0,0,1,0,1,0
6,7.540424e+15,8883972.0,2010-11-24 21:05:51,@SamerElSahn إنت عايزه يقول إيه؟ يهدي النفوس؟,ZH,ar,إنت عايزه يقول إيه؟ يهدي النفوس؟,0,0,0,0,0,0,0,0,0,0
7,7.542227e+15,8883972.0,2010-11-24 21:13:01,@SamerElSahn لو سمعت كلمة من أي حد في الحكومة ...,ZH,ar,لو سمعت كلمة أي حد الحكومة دي هرجع,1,0,0,0,0,0,0,0,0,1
8,7.543013e+15,8883972.0,2010-11-24 21:16:09,@SamerElSah عايزه ينكت زي نكتة العبارة؟ :(,ZH,ar,عايزه ينكت زي نكتة العبارة؟ : (,0,1,0,0,0,0,0,0,0,0
9,8.803389e+15,14331452.0,2010-11-28 08:44:26,صباح النور http://twitpic.com/3aywc4 http://t...,BE,ar,صباح النور,0,0,0,0,0,0,0,0,0,0


In [178]:
tweets_with_emptions.Joy.sum()

46132

In [179]:
tweets_with_emptions.to_csv('emotions_data/ar_final_emotion.csv',index=None)

In [None]:
#concatnate all emotion files

In [180]:
import glob, os
os.chdir("emotions_data")

In [181]:

emotion_big_df=pd.DataFrame()
i=0
for file in glob.glob("*.csv"):
    name=file.split(".")[0]
    print(name)
    df=pd.read_csv(file)
    print(len(df))
    emotion_big_df=pd.concat([emotion_big_df,df], axis=0)
emotion_big_df.head()

ar_final_emotion
229191
de_final_emotion
1402170
en_final_emotion
2985994
es_final_emotion
443822
fr_final_emotion
3500272
it_final_emotion
484415
nl_final_emotion
197212
pt_final_emotion
476702
tr_final_emotion
331035


Unnamed: 0,id,userId,createdAt,text,canton,language,cleaned_text,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,17566680000.0,14331452.0,2010-07-02 12:05:23,@alfarhan خطيبنا تحدث عن علاقة الآباء بالابناء...,BE,ar,خطيبنا تحدث علاقة الآباء بالابناء كيف الاسلام ...,0,0,0,0,0,0,0,0,0,0
1,20919660000.0,14331452.0,2010-08-11 21:52:55,للتو.. انتهينا من الراويح,BE,ar,للتو.. انتهينا الراويح,0,0,0,0,0,0,0,0,0,0
2,24080700000.0,14331452.0,2010-09-10 05:47:05,في الطريق الى صلاة الجمعة.. تقبل الله منا و من...,BE,ar,الطريق صلاة الجمعة.. تقبل الله منا منكم صالح ا...,2,0,0,1,0,1,1,0,0,1
3,24080730000.0,14331452.0,2010-09-10 05:47:46,عفوا قصدت صلاة العيد,BE,ar,عفوا قصدت صلاة العيد,1,0,0,1,0,0,1,0,1,1
4,26274650000.0,14331452.0,2010-10-03 15:12:59,افكار كثيرة لمشاريع تجول في خاطري لكن مع الاسف...,BE,ar,افكار كثيرة لمشاريع تجول خاطري الاسف الدراسة ا...,0,0,0,0,0,0,0,0,0,0


In [182]:
len(emotion_big_df)

10050813

In [183]:
emotion_big_df.to_csv('all_tweets_with_emotions.csv', index=None)