In [73]:
import pandas as pd

In [60]:
# reading data in csv file using pandasb
df = pd.read_csv('filtered_tweets.csv')

In [61]:
df.head()

Unnamed: 0,target,id,date,flag,user,text
0,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."
1,0,1467811594,Mon Apr 06 22:20:03 PDT 2009,NO_QUERY,coZZ,@LOLTrish hey long time no see! Yes.. Rains a...
2,0,1467813579,Mon Apr 06 22:20:31 PDT 2009,NO_QUERY,starkissed,@LettyA ahh ive always wanted to see rent lov...
3,0,1467818603,Mon Apr 06 22:21:49 PDT 2009,NO_QUERY,kennypham,"Sad, sad, sad. I don't know why but I hate thi..."
4,0,1467819650,Mon Apr 06 22:22:05 PDT 2009,NO_QUERY,antzpantz,@Viennah Yay! I'm happy for you with your job!...


In [37]:
# Finding out the top tweeters 
number_tweets_df = df.groupby('user').text.count().reset_index()
number_tweets_df.columns = ['user', 'number of tweets']

number_tweets_df.sort_values(by=['number of tweets'], ascending=False, inplace=True)
number_tweets_df.head()

Unnamed: 0,user,number of tweets
4129,lost_dog,549
5530,webwoke,345
5424,tweetpet,310
1767,SallytheShizzle,281
2204,VioletsCRUK,279


In [90]:
# Creating a test dataframe using the top 3 users
small_df = df[df['user'].isin(['lost_dog', 'webwoke', 'tweetpet'])]

# Creating a dictionary of user and their tweets 
user_tweet_dictionary = {}

# Create a format that follows IBM Watson's format and saving it to the user_tweet_dictionary
for index, row in small_df.iterrows():
    user = row['user']
    text = row['text']
    
    tweet = {}
    
    tweet['content'] = text
    tweet['contenttype'] = 'text/plain'
    tweet['language'] = 'en'
    
    if user not in user_tweet_dictionary:
        user_tweet_dictionary[user] = {'contentItems': []}
        user_tweet_dictionary[user]['contentItems'].append(tweet)
    else:
        user_tweet_dictionary[user]['contentItems'].append(tweet)

In [88]:
import json
from ibm_watson import PersonalityInsightsV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

#API KEY provided on the service page 
KEY = #<YOUR API KEY>

# Authentication via IBM's IAM (Identity and Access Management)
authenticator = IAMAuthenticator(KEY)

# Creating a service instance
service = PersonalityInsightsV3(
    version='2017-10-13',
    authenticator=authenticator)

# Setting service endpoint 
service.set_service_url('https://gateway.watsonplatform.net/personality-insights/api')

In [108]:
# Creating a dictionary to store results from IBM Watson
results = {'User': [],
          'Openness': [],
          'Conscientiousness': [],
          'Extraversion': [],
          'Agreeableness': [],
          'Emotional range': []}

# creates profiles of users in user_tweet_dictionary and saves them to results
for user, tweets in user_tweet_dictionary.items():
    profile = service.profile(tweets, 'application/json', raw_scores=True, consumption_preferences=True).get_result()
    
    results['User'].append(user)
    results['Openness'].append(profile['personality'][0]['raw_score'])
    results['Conscientiousness'].append(profile['personality'][1]['raw_score'])
    results['Extraversion'].append(profile['personality'][2]['raw_score'])
    results['Agreeableness'].append(profile['personality'][3]['raw_score'])
    results['Emotional range'].append(profile['personality'][4]['raw_score'])

In [109]:
# Create a dataframe from results dictionary
big5_df = pd.DataFrame(results)
big5_df.head()

Unnamed: 0,User,Openness,Conscientiousness,Extraversion,Agreeableness,Emotional range
0,tweetpet,0.742514,0.593705,0.537122,0.717102,0.276272
1,lost_dog,0.70023,0.599959,0.523555,0.787387,0.851586
2,webwoke,0.744489,0.602494,0.535919,0.745303,0.654681


<div class='alert alert-info'>
    Bunch of examples below!
</div>

In [66]:
# regular expression
# To better understand how regular expression, go through the module on it on CodeCademy
# Here is a cheatsheet: https://www.rexegg.com/regex-quickstart.html
import re 
string = 'I am a pony @HELLO!'
new_string = re.sub(r'@\w+', '', string)
print(new_string)

I am a pony !


In [65]:
# number of tweets for each user
# There are also modules on pandas on CodeCademy! the groupby method here groups the data by the user, so that I can find the number of tweets per user in this case 
df.groupby('user')['text'].count()

user
12gaBrowningGal     23
15Stepz             21
16_MileyCyrus       99
18percentgrey       52
19c816tf9227        63
19fischi75         104
1ChazD              23
1azylizzie          24
1flyharmony         25
24cotton            24
2NiteBoy            34
2emc                42
30STMWithJared      22
30STMluva           47
3CB                 71
420thoughts         28
4Hours              31
4_s_m_4             23
4evaurgirl          45
4everequine         24
5toSucceed         109
80smusicthebest     23
82kg                33
87sal87             23
90rachal            22
9MMNINAROSS         23
AAmyHaanson         27
ABCeCe              29
ABZQuine            27
ABabyBlueEyes       59
                  ... 
youcollme           25
youdontknowmel      24
youngfreshnew       28
youngnatho          22
youngnik718         35
youreyesdontlie     39
youroryoure         36
yulebesorryx        21
yunita_dee          24
yuvipanda           63
zackoid             54
zanderjaymz         30
zandra

In [68]:
# number of unique tweets for each user
df.groupby('user')['text'].nunique()

user
12gaBrowningGal     23
15Stepz             21
16_MileyCyrus       99
18percentgrey       52
19c816tf9227        63
19fischi75         102
1ChazD              23
1azylizzie          24
1flyharmony         25
24cotton            24
2NiteBoy            34
2emc                42
30STMWithJared      22
30STMluva           47
3CB                 71
420thoughts         28
4Hours              26
4_s_m_4             23
4evaurgirl          45
4everequine         24
5toSucceed         109
80smusicthebest     23
82kg                33
87sal87             23
90rachal            22
9MMNINAROSS         23
AAmyHaanson         27
ABCeCe              29
ABZQuine            27
ABabyBlueEyes       58
                  ... 
youcollme           25
youdontknowmel      24
youngfreshnew       27
youngnatho          22
youngnik718         35
youreyesdontlie     38
youroryoure         36
yulebesorryx        21
yunita_dee          24
yuvipanda           63
zackoid             54
zanderjaymz         30
zandra