# Twitter Analytics - iPhone X vs. Samsung Galaxy S9.
#### 

## Table of Contents

#### [Data Collection Methodology](#data_collect)

#### [Descriptive Analytics](#sect_1)


#### [Content Analytics](#sect_2)


#### [Network Analytics](#sect_3)

<br>
<br>


In [1]:
import tweepy
import csv
import time
import json
import pandas as pd
import re
import numpy as np

from collections import Counter
from operator import itemgetter
from itertools import combinations

import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords

from pandas.io.json import json_normalize
import ast

# Import TextBlob for sentiment analysis
from textblob import TextBlob
from textblob import Word

from IPython.display import HTML, display, Image

In [None]:
pd.options.display.max_columns = None
pd.options.display.max_colwidth = 250
pd.options.display.float_format = '{:,.4f}'.format

#### Language Codes List

In [3]:
#Language Codes Lookup List
#pip install datapackage
from datapackage import Package

#package = Package('https://datahub.io/core/language-codes/datapackage.json')
#I loaded datapackage.json locally
package = Package('data/datapackage.json')

# print list of all resources if needed:
#print(package.resource_names)

#Get language codes
res=package.get_resource('language-codes')
#res=package.get_resource('language-codes-full')
langlists=res.table.read()

# Use dict comprehension to allow lookup of language by code
lang_codes={d[0]: d[1] for d in langlists}

#Add some old non-standard codes
lang_codes.update({'und': 'Undetermined','in': 'Indonesian','iw': 'Hebrew'})

#### Now we can lookup actual language name by its code found in tweet metadata

In [4]:
# Test language code lookup
lang_codes['sv']

u'Swedish'


### Read Raw Tweets

In [5]:
import json

filename='data/IPhoneXSamsungS9.json'

with open(filename, 'r') as f:
    line = f.readline() # read only the first tweet/line
    tweet = json.loads(line) # load it as Python dictionary
    print(json.dumps(tweet, indent=4)) 
    
# the original data from Twitter looks like below.
f.close()

{
    "quote_count": 0, 
    "contributors": null, 
    "truncated": false, 
    "text": "Todos querr\u00e1n saber el secreto detr\u00e1s de tus fotos. #GalaxyS9 https://t.co/uAHpiVRIF8", 
    "is_quote_status": false, 
    "in_reply_to_status_id": null, 
    "reply_count": 0, 
    "id": 987783049868955654, 
    "favorite_count": 0, 
    "entities": {
        "user_mentions": [], 
        "symbols": [], 
        "hashtags": [
            {
                "indices": [
                    52, 
                    61
                ], 
                "text": "GalaxyS9"
            }
        ], 
        "urls": [], 
        "media": [
            {
                "additional_media_info": {
                    "monetizable": false
                }, 
                "expanded_url": "https://twitter.com/samsungmobilemx/status/987783049868955654/video/1", 
                "display_url": "pic.twitter.com/uAHpiVRIF8", 
                "url": "https://t.co/uAHpiVRIF8", 
                "medi

#### Read file and separate by iPhone X, Samsung S9, or both.

In [17]:
count_samsungs9=0
count_iphonex=0
count_both=0
tweets_both = []
tweets_samsungs9 = []
tweets_iphone = []

with open(filename, 'r') as f:
    for line in f:
        if ( ('galaxys9' in line.lower())|('samsungs9' in line.lower()) | ('galaxy s9' in line.lower())|
            ('samsung s9' in line.lower())  ) & (('iphone x' in line.lower())|('iphonex' in line.lower()) ):
            count_both = count_both+1
            try:
                # List of tweets with both iPhone X and Samsung S9
                tweets_both.append(json.loads(line))
            except:
                pass
        elif ( ('galaxys9' in line.lower())|('samsungs9' in line.lower()) | ('galaxy s9' in line.lower())|
            ('samsung s9' in line.lower())  ):
            count_samsungs9 = count_samsungs9+1
            try:
                # List of tweets with Samsung S9
                tweets_samsungs9.append(json.loads(line))
            except:
                pass
        elif (('iphone x' in line.lower())|('iphonex' in line.lower()) ) :
            count_iphonex = count_iphonex+1
            try:
                # List of tweets with iPhone X
                tweets_iphone.append(json.loads(line))
            except:
                pass
            
    f.close()
print "%s tweets loaded related to both iPhoneX and Samsung S9" %(count_both)
print "%s tweets loaded related to Samsung S9" %(count_samsungs9)
print "%s tweets loaded related to iPhoneX" %(count_iphonex)



701 tweets loaded related to both iPhoneX and Samsung S9
8168 tweets loaded related to Samsung S9
26458 tweets loaded related to iPhoneX


#### Get languages for tweets

In [7]:
lang_both=[]
lang_iphone=[]
lang_samsungs9=[]

for tweet in tweets_both:
    #print tweet['lang']
    lang_both.append(tweet['lang'])
    
for tweet in tweets_iphone:
    #print tweet['lang']
    lang_iphone.append(tweet['lang'])

for tweet in tweets_samsungs9:
#    #print tweet['lang']
    lang_samsungs9.append(tweet['lang'])


#### Language frequency counts

In [8]:

#Count frequency of languages for tweets with both iPhone X and Samsung S9
count_lang_both = Counter(lang_both)
#print count_lang_both

#Count frequency of languages for tweets with iPhone X
count_lang_iphone = Counter(lang_iphone)
#print count_lang_iphone

#Count frequency of languages for tweets with Samsung S9
count_lang_samsungs9 = Counter(lang_samsungs9)
#print count_lang_samsungs9



In [14]:
#Language counts for Tweets with both iPhone X and Samsung S9
count_lang_both.items()
df_count_lang_both = pd.DataFrame(count_lang_both.items())
df_count_lang_both.rename(columns = {0: 'Language Code',1:'Number Tweets'},inplace=True)
df_count_lang_both['Language Name']=df_count_lang_both['Language Code'].apply(lambda x: lang_codes[x] )
print "Number of languages in tweets containing both iPhone X and Samsung S9: %s" % len(df_count_lang_both)
df_count_lang_both.sort_values(by='Number Tweets', ascending=False)

Number of languages in tweets containing both iPhone X and Samsung S9: 18


Unnamed: 0,Language Code,Number Tweets,Language Name
2,en,462,English
13,in,69,Indonesian
17,es,59,Spanish; Castilian
16,ja,31,Japanese
5,de,16,German
12,th,13,Thai
3,pt,11,Portuguese
1,fr,8,French
11,und,7,Undetermined
0,ru,7,Russian


In [15]:
#Language counts for Tweets with iPhone X
count_lang_iphone.items()
df_count_lang_iphone = pd.DataFrame(count_lang_iphone.items())
df_count_lang_iphone.rename(columns = {0: 'Language Code',1:'Number Tweets'},inplace=True)
df_count_lang_iphone['Language Name']=df_count_lang_iphone['Language Code'].apply(lambda x: lang_codes[x] )
print "Number of languages in tweets containing iPhone X: %s" % len(df_count_lang_iphone)
df_count_lang_iphone.sort_values(by='Number Tweets', ascending=False)


Number of languages in tweets containing iPhone X: 49


Unnamed: 0,Language Code,Number Tweets,Language Name
1,en,12712,English
38,ja,5126,Japanese
22,tl,1796,Tagalog
17,pt,1619,Portuguese
14,es,1290,Spanish; Castilian
11,in,772,Indonesian
24,th,627,Thai
27,fr,512,French
30,de,380,German
44,und,320,Undetermined


In [18]:
#Language counts for Tweets with Samsung S9
df_count_lang_samsungs9 = pd.DataFrame(count_lang_samsungs9.items())
df_count_lang_samsungs9.rename(columns = {0: 'Language Code',1:'Number Tweets'},inplace=True)
df_count_lang_samsungs9['Language Name']=df_count_lang_samsungs9['Language Code'].apply(lambda x: lang_codes[x] )
print "Number of languages in tweets containing Samsung S9: %s" % len(df_count_lang_samsungs9)
df_count_lang_samsungs9.sort_values(by='Number Tweets', ascending=False)


Number of languages in tweets containing Samsung S9: 40


Unnamed: 0,Language Code,Number Tweets,Language Name
1,en,3663,English
15,pt,1470,Portuguese
25,fr,931,French
12,es,864,Spanish; Castilian
37,und,229,Undetermined
9,in,159,Indonesian
27,de,138,German
4,ca,104,Catalan; Valencian
34,ja,76,Japanese
8,ar,74,Arabic
