##  Miquela- Automatic Music Video Comment Analysis 
![Miquela](ss2.jpg)

#### LOAD Miquela- Automatic Music Video Comments 

###### Import data analysis tool Pandas 

In [4]:
import pandas as pd

##### Read in data into data frame using pandas

In [5]:
data = pd.read_json('data/Miquela_Automatic_Comments',lines=True)

In [6]:
## Display raw data w/emojis 
data.head(15)

Unnamed: 0,author,cid,text,time
0,Uma Kompton Plays,UgwLmC_xRIQxenZ5JEN4AaABAg,the lyrics really go with the beat and this is...,56 minutes ago
1,Serendip.,UgzyubzvkrXs6_qNfrN4AaABAg,"If japan can embrace holograms then please, le...",2 hours ago
2,princesinha do arrocha,UgzbzZwEofuuuen7OjN4AaABAg,"Gente ela é uma pessoa de verdade, mas o rosto...",2 hours ago
3,melanie's world,Ugyck7287YL4myYCmhF4AaABAg,Isso e um robo??,3 hours ago
4,Dibujando con Daniel,Ugyv3EzeawrOHCebcUB4AaABAg,Is she a animation?,4 hours ago
5,John Miller,Ugyh4Rmv9Do19n3rOJB4AaABAg,Shane. Where you at?,4 hours ago
6,Vitória Régia,UgwPFcFnqiRiBVghTzR4AaABAg,Só eu que acho ela parecida com um desenho 3D?,6 hours ago (edited)
7,ッsυrτα,UgzZfxuD02UiBvosv114AaABAg,me deu medo,6 hours ago
8,IsIsIsIsh,Ugw3HkwL852hkiEblrt4AaABAg,"Ppl are like actually hating kn her, but shes ...",6 hours ago
9,Mad Eleven,UgyMAO54R_W2m-vgtEl4AaABAg,Como assim ela é um robô cara,8 hours ago


##### remove emojis from comments --unicode ascii string literal string escape

In [7]:
for ind,val in enumerate(data['text']):
    data.loc[ind]['text'] = val.replace(u"\\",u" \\").encode('ascii','ignore')


##### Display preprocessed data 



In [8]:
data.head(15)

Unnamed: 0,author,cid,text,time
0,Uma Kompton Plays,UgwLmC_xRIQxenZ5JEN4AaABAg,the lyrics really go with the beat and this is...,56 minutes ago
1,Serendip.,UgzyubzvkrXs6_qNfrN4AaABAg,"If japan can embrace holograms then please, le...",2 hours ago
2,princesinha do arrocha,UgzbzZwEofuuuen7OjN4AaABAg,"Gente ela uma pessoa de verdade, mas o rosto ...",2 hours ago
3,melanie's world,Ugyck7287YL4myYCmhF4AaABAg,Isso e um robo??,3 hours ago
4,Dibujando con Daniel,Ugyv3EzeawrOHCebcUB4AaABAg,Is she a animation?,4 hours ago
5,John Miller,Ugyh4Rmv9Do19n3rOJB4AaABAg,Shane. Where you at?,4 hours ago
6,Vitória Régia,UgwPFcFnqiRiBVghTzR4AaABAg,S eu que acho ela parecida com um desenho 3D?,6 hours ago (edited)
7,ッsυrτα,UgzZfxuD02UiBvosv114AaABAg,me deu medo,6 hours ago
8,IsIsIsIsh,Ugw3HkwL852hkiEblrt4AaABAg,"Ppl are like actually hating kn her, but shes ...",6 hours ago
9,Mad Eleven,UgyMAO54R_W2m-vgtEl4AaABAg,Como assim ela um rob cara,8 hours ago


##### create array with only comments, and only comment authors 

In [9]:
train_data = (data['text'].values)
train_author = data['author'].values

##### tokenize each comment, and append list of tokenzied commenets to a list of comments 



In [12]:
from tokenize import tokenize
import string
import gensim

In [13]:
token_train_data=[]
for i in train_data:
    comment = str(i).translate(None, string.punctuation)
    token_train_data.append(list(gensim.utils.tokenize(comment)))


##### Display tokenized comment in comment array

In [14]:
token_train_data[1]

[u'If',
 u'japan',
 u'can',
 u'embrace',
 u'holograms',
 u'then',
 u'please',
 u'leave',
 u'Miquela',
 u'alone']

##### Load tokenized data into dictionary and id2words for BoW embeddings 

In [15]:
from gensim import corpora
dictionary = corpora.Dictionary(token_train_data)
corpus = [dictionary.doc2bow(text) for text in token_train_data]

## TRAIN LDA TOPIC MODEL ---- edit number of topics/ n - clusters 

In [252]:
NUM_TOPICS = 10
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = NUM_TOPICS, id2word=dictionary, passes=15)
ldamodel.save('model5.gensim')

#### LDA TOPIC VISUALIZATION 

In [294]:

lda = gensim.models.ldamodel.LdaModel.load('lda_model/model5.gensim')
import pyLDAvis.gensim
lda_display = pyLDAvis.gensim.prepare(lda, corpus, dictionary, sort_topics=False)
pyLDAvis.display(lda_display)

### ANALYZE COMMENTS FOR SENTIMENT ANALYSIS 

In [16]:
from textblob import TextBlob


#### Parts of speech tagging for each token in comment by Textblob

In [17]:
b = TextBlob(train_data[11])
b.tags

[(u'this', u'DT'),
 (u'makes', u'VBZ'),
 (u'me', u'PRP'),
 (u'feel', u'VB'),
 (u'like', u'IN'),
 (u'im', u'JJ'),
 (u'listening', u'VBG'),
 (u'to', u'TO'),
 (u'the', u'DT'),
 (u'song', u'NN'),
 (u'with', u'IN'),
 (u'someone..', u'NN'),
 (u'damn', u'NN'),
 (u'im', u'NN'),
 (u'lonely', u'RB'),
 (u'af', u'JJ')]

#### produce polarity and subjectivity metrics by iterating through comments array
###### polarity (-1 being negative , +1 being postive) and subjectivity (0 being factual and 1 being subjective) 
###### i.e. polarity is 0.8,  the statement is positive and 0.75 subjectivity = public opinion and not a factual information.



In [18]:
sentiment_dict = {}
count = 0
for i in train_data:
    b= TextBlob(i)
    sentiment_dict[count]={'polarity':b.sentiment.polarity,'subj':b.sentiment.subjectivity}
    count = count+1

##### add polarity and subjectivity metrics to initial dataframe 

In [19]:
sentiment_df = pd.DataFrame.from_dict(sentiment_dict,orient='index')

In [20]:
sentiment_df.columns

Index([u'polarity', u'subj'], dtype='object')

In [21]:
data['polarity']=sentiment_df[u'polarity']

In [22]:
data['subjectivity']=sentiment_df[u'subj']

In [23]:
data.head(10)

Unnamed: 0,author,cid,text,time,polarity,subjectivity
0,Uma Kompton Plays,UgwLmC_xRIQxenZ5JEN4AaABAg,the lyrics really go with the beat and this is...,56 minutes ago,0.4,0.7125
1,Serendip.,UgzyubzvkrXs6_qNfrN4AaABAg,"If japan can embrace holograms then please, le...",2 hours ago,0.0,0.0
2,princesinha do arrocha,UgzbzZwEofuuuen7OjN4AaABAg,"Gente ela uma pessoa de verdade, mas o rosto ...",2 hours ago,0.0,0.0
3,melanie's world,Ugyck7287YL4myYCmhF4AaABAg,Isso e um robo??,3 hours ago,0.0,0.0
4,Dibujando con Daniel,Ugyv3EzeawrOHCebcUB4AaABAg,Is she a animation?,4 hours ago,0.0,0.0
5,John Miller,Ugyh4Rmv9Do19n3rOJB4AaABAg,Shane. Where you at?,4 hours ago,0.0,0.0
6,Vitória Régia,UgwPFcFnqiRiBVghTzR4AaABAg,S eu que acho ela parecida com um desenho 3D?,6 hours ago (edited),0.0,0.0
7,ッsυrτα,UgzZfxuD02UiBvosv114AaABAg,me deu medo,6 hours ago,0.0,0.0
8,IsIsIsIsh,Ugw3HkwL852hkiEblrt4AaABAg,"Ppl are like actually hating kn her, but shes ...",6 hours ago,-0.05,0.2
9,Mad Eleven,UgyMAO54R_W2m-vgtEl4AaABAg,Como assim ela um rob cara,8 hours ago,0.0,0.0


#### Summary Statistics for Polarity and Subjectivity of Comments for  "Miquela - Automatic" music video 

In [291]:
data['polarity'].describe()

count    1254.000000
mean        0.128112
std         0.308059
min        -0.800000
25%         0.000000
50%         0.000000
75%         0.250000
max         1.000000
Name: polarity, dtype: float64

In [292]:
data['subjectivity'].describe()

count    1254.000000
mean        0.294679
std         0.349346
min         0.000000
25%         0.000000
50%         0.000000
75%         0.600000
max         1.000000
Name: subjectivity, dtype: float64