In [33]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import ParameterGrid
from sklearn.pipeline import Pipeline
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.cluster import KMeans
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import silhouette_score
from sklearn.model_selection import ParameterGrid
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer
import matplotlib.pyplot as plt
from scipy.sparse import coo_matrix, hstack
import itertools
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.multiclass import OneVsRestClassifier
from sklearn import preprocessing
from sklearn.metrics import coverage_error
from sklearn.svm import SVC
from sklearn.metrics import label_ranking_average_precision_score
from sklearn.neural_network import MLPClassifier
import scipy.sparse
from sklearn.cluster import KMeans

In [2]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from bs4 import BeautifulSoup

#from sklearn.preprocessing import StandardScaler

from scipy.sparse import coo_matrix, hstack


In [46]:
#Dataset loading
dataset = pd.read_csv('Cleaned_Posts.csv', nrows = None, index_col=0)
dataset.shape

(49399, 10)

In [47]:
#Any np.nan ?
dataset.isnull().sum()

TText             0
TText_NEG         0
PText             0
TCode         10054
PCode          9627
TTitle            0
TTitle_NEG        0
Title             0
PTags             0
Tags              0
dtype: int64

In [48]:
#np.nan cleaning
dataset['TCode'] = dataset['TCode'].fillna('None')
dataset['PCode'] = dataset['PCode'].fillna('None')

In [49]:
#Any np.nan ?
dataset.isnull().sum()

TText         0
TText_NEG     0
PText         0
TCode         0
PCode         0
TTitle        0
TTitle_NEG    0
Title         0
PTags         0
Tags          0
dtype: int64

In [50]:
#Downsampling
dataset = dataset.sample(5000)
dataset.shape

(5000, 10)

In [59]:
#Tags cleaning
dataset['PTags'] = dataset['Tags'].apply(lambda x: [tag.name for tag in BeautifulSoup(x, 'html.parser').find_all()])

In [108]:

class CustomLDA(BaseEstimator, TransformerMixin):

    def __init__(self,text_n_components = 10):
        self.text_n_components = text_n_components

    def fit(self, X, y=None):
        dftext = X['TText']
        dftitle = X['TTitle']
        dfcode = X['TCode']
        #Text preparation
        self.textcvect = CountVectorizer(tokenizer=None, vocabulary=None)
        temp = self.textcvect.fit_transform(dftext)
        self.textlda = LatentDirichletAllocation(n_components=50, learning_method = 'batch')
        self.textlda.fit(temp)
        
        #Title preparation
        self.titlecvect = CountVectorizer(tokenizer=None, vocabulary=None, )
        temp = self.titlecvect.fit_transform(dftitle)
        self.titlelda = LatentDirichletAllocation(n_components=50, learning_method = 'batch')
        self.titlelda.fit(temp)
        
        #Code preparation
        self.codecvect = CountVectorizer(tokenizer=None, vocabulary=None, )
        temp = self.codecvect.fit_transform(dfcode)
        self.codelda = LatentDirichletAllocation(n_components=50, learning_method = 'batch')
        self.codelda.fit(temp)
        
        return self
    
    def transform(self, X, y=None):
        dftext = X['TText']
        dftitle = X['TTitle']
        dfcode = X['TCode']
        
        #Text preparation
        textvect = self.textcvect.transform(dftext)
        textlda = self.textlda.transform(textvect)
        
        #Title preparation
        titlevect = self.titlecvect.transform(dftitle)
        titlelda = self.titlelda.transform(titlevect)
    
        #Code preparation
        codevect = self.codecvect.transform(dfcode)
        codelda = self.codelda.transform(codevect)
        
        return np.hstack((textlda, titlelda, codelda))
    
    def get_feature_names(self):
        textfnames = self.textcvect.get_feature_names()
        titlefnames = self.titlecvect.get_feature_names()
        codefnames = self.codecvect.get_feature_names()
        toreturn = []
        toreturn.extend(textfnames)
        toreturn.extend(titlefnames)
        toreturn.extend(codefnames)
        
        return toreturn
    
    def components_(self):
        text = self.textlda.components_
        title = self.titlelda.components_
        code = self.codelda.components_
        return np.hstack((text, title, code))


#customLDA = CustomLDA()
#customLDA.fit(dataset[['TText', 'TTitle', 'TCode']])
#print('Step1 done')
#customLDA.transform(textset[['TText', 'TTitle', 'TCode']])

In [12]:
article = [48098044,]
samplepost = dataset[['TText', 'TTitle', 'TCode']].loc[article]


customLDA = CustomLDA()
customLDA.fit(dataset[['TText', 'TTitle', 'TCode']])

CustomLDA(text_n_components=10)

In [17]:
W = customLDA.transform(samplepost)
H = customLDA.components_()
feature_names = customLDA.get_feature_names()

In [34]:
for topic_idx, topic in enumerate(H):
    print("Topic %d:" % (topic_idx))
    print(" ".join([feature_names[i] for i in topic.argsort()[:-6:-1]]))

Topic 0:
public get set packages use
Topic 1:
name address new value require
Topic 2:
file use self code the
Topic 3:
android id key codes string
Topic 4:
self none java org apache
Topic 5:
value type use button input
Topic 6:
if as constraintset value new
Topic 7:
error this use get return
Topic 8:
java at org com file
Topic 9:
div class form id view


In [24]:
TText = samplepost['TText'].iloc[0]
TText

"set vm argument eclips like valu inject prop file load prop file use spring code config_data_path set success system properti inject prop file prop.getproperti `` test '' print path want simpli print config_data_path"

In [109]:
customLDA = CustomLDA()
X = customLDA.fit_transform(dataset[['TText', 'TTitle', 'TCode']])

In [110]:
kmeans = KMeans(n_clusters=500, random_state=0).fit(X)

In [115]:
mask = kmeans.labels_ == 45
popularitydict = {}
for tags in list(dataset['PTags'][mask]):
    for tag in tags:
        if tag in popularitydict.keys():
            popularitydict[tag] += 1
        else:
            popularitydict[tag] = 1
df = pd.DataFrame(list(popularitydict.items()))
df.columns = ('Tag', 'Count')
df.set_index('Tag', inplace = True)
df.sort_values('Count', ascending=False)

Unnamed: 0_level_0,Count
Tag,Unnamed: 1_level_1
python,3
android,2
javascript,2
email,2
if-statement,1
r,1
physics,1
gmail,1
automation,1
mongodb,1


In [112]:
for index, text in dataset[['PText', 'Title']][mask].iterrows():
    print(text['Title'])
    print()
    print(text['PText'])
    print('\n\n')

Twilio - Using taskrouter.js and reservation.conference() how to not beep and end conference

 I am using task router to assign an incoming call task to a worker. When the worker gets the reservation I am starting a conference like this: There is not much documentation for how to handle a conference with taskrouter.js, but this seems to work to start the conference. There are 2 problems I am having: I can't stop the 'entering conference' beep to not play When both the worker and participant exit the conference the conference is not actually ended and therefore not putting the worker into the after work activity state. Any help would be appreciated.



How to set multiple scroll views size to change dynamically relative to each other?

 I have two scroll views in a vertical linear layout.
I want them to be relative to each other so that they fill the entire linear layout and compensate if one cant cover half the screen. Lets call that scroll views TOP and BOT.
If the screen can display 