In [1]:
# -*- coding: utf-8 -*-
from datetime import datetime
import regex as re
from abc import ABCMeta, abstractmethod
from collections import OrderedDict

import jieba
import jieba.posseg as pseg
import requests
from bs4 import BeautifulSoup

In [2]:
# encoding=UTF-8
# !flask/bin/python

from cassandra.cluster import Cluster
from cassandra.policies import DCAwareRoundRobinPolicy
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import BatchStatement
import pandas as pd


class CassandraType(object):
    PRODUCTION = 0
    TEST = 1
    TEST_DOCKER = 2


class CassandraDAO(object):

    # you have to install following items :
    # a. python-Cassandra driver
    # b. pyspark cassandra connector

    def __init__(self, type):
        #         print('runing father.__init__')
        if type == CassandraType.PRODUCTION:
            self.contact_points = ['192.168.95.127', '192.168.95.122']
            self.contact_points_str = "192.168.95.127,192.168.95.122"
        elif type == CassandraType.TEST:
            self.contact_points = ['192.168.0.41', '192.168.0.42']
            self.contact_points_str = "192.168.0.41,192.168.0.42"
        else:
            self.contact_points = ['192.168.0.121', '192.168.0.122', '192.168.0.52']
            self.contact_points_str = "192.168.0.121,192.168.0.122,192.168.0.52"

        self.formatString = "org.apache.spark.sql.cassandra"
        self.username = "username"
        self.password = "password"
        self.cluster = None
        self.session = None
        self.createSession()

    def __del__(self):
        self.cluster.shutdown()

    def pandas_factory(self, colnames, rows):
        return pd.DataFrame(rows, columns=colnames)

    def createSession(self):
        print "contact_points = " + self.contact_points_str
        self.cluster = Cluster(
            contact_points=self.contact_points,  # random select a node
            #             load_balancing_policy = DCAwareRoundRobinPolicy(local_dc='datacenter1'),
            #         auth_provider = PlainTextAuthProvider(username='cassandra', password='cassandra')
        )
        self.session = self.cluster.connect()
        self.session.row_factory = self.pandas_factory
        self.session.default_fetch_size = 10000000  # needed for large queries, otherwise driver will do pagination. Default is 50000.

    def getSession(self):
        return self.session

    def execCQL(self, keyspace, cql):
        """
        execute CQL
        """
        self.session.set_keyspace(keyspace)
        self.session.execute_async(cql)

    def execCQLSelect(self, keyspace, cql):
        """
        execute CQL, select only
        """

        self.session.set_keyspace(keyspace)

        #       cassandra ResultSet
        async_results = self.session.execute_async(cql)
        return async_results

    def execCQLCallBackAnysc(self, keyspace, cql, handle_success, handle_error):
        """
        execute CQL, if success => handle_success function, else handle_error
        """
        self.session.set_keyspace(keyspace)
        async_results = self.session.execute_async(cql)
        async_results.add_callbacks(handle_success, handle_error)

    def execCQLSelectToPandasDF(self, keyspace, cql):
        """
        execute CQL, select only, return Pandas DataFrame
        """

        self.session.set_keyspace(keyspace)

        #       cassandra ResultSet
        async_results = self.session.execute_async(cql)
        #         async_results = self.session.execute_async(cql)
        #       to Pandas DataFrame
        return async_results.result()._current_rows


    def execCQLSelectToRDD(self, sqlContext, keyspace, cql):
        """
        execute CQL, select only, return Spark RDD
        """

        return self.execCQLSelectToDF(sqlContext, keyspace, cql).rdd.map(tuple)  # dataFrame to RDD

    @property
    def contactPoints(self):
        return self.contact_points

    @contactPoints.setter
    def contactPoints(self, contact_points):
        self.contact_points = contact_points

    @contactPoints.deleter
    def contactPoints(self):
        del self.contact_points


In [3]:
CASSANDRA_ENV = CassandraType.PRODUCTION
dao = CassandraDAO(CASSANDRA_ENV)

contact_points = 192.168.95.127,192.168.95.122


In [4]:
import pandas as pd
import jieba
import jieba.posseg as pseg

class JiebaSegmentor:

    def __init__(self, dict_path, userdict=[], stopwords = False, stopwords_path=None):
        self.dict_path = dict_path
        self.userdict = userdict
        self.dictionary_init()
        self.stopwords_path = stopwords_path
        self.stopwords = stopwords
        self.stopwords_set = set()
        self.stopwords_init()

    def dictionary_init(self):
        jieba.set_dictionary(self.dict_path)
        for path in self.userdict:
            print path
            jieba.load_userdict(path)
    
    def stopwords_init(self):
        if self.stopwords_path:
            with open(self.stopwords_path ,'r') as stopwords:
                for stopword in stopwords:
                    self.stopwords_set.add(stopword.strip('\n').decode('utf-8'))

    def taiwan_country(self):
        return [u'臺北', u'台北', u'基隆', u'臺中', u'台中', u'臺南', u'台南', u'高雄',
                u'宜蘭', u'桃園', u'新竹', u'苗栗', u'彰化', u'南投', u'嘉義', u'雲林',
                u'屏東', u'臺東', u'台東', u'花蓮', u'澎湖']

    def wordToNumber(self, input_text):

        target = u''
        for s in input_text:

            if (s == u'零') or (s == '0'):
                to_word = u'0'
            elif (s == u'一') or (s == u'壹') or (s == '1'):
                to_word = u'1'
            elif (s == u'二') or (s == u'兩') or (s == u'貳') or (s == '2'):
                to_word = u'2'
            elif (s == u'三') or (s == u'參') or (s == '3'):
                to_word = u'3'
            elif (s == u'四') or (s == u'肆') or (s == '4'):
                to_word = u'4'
            elif (s == u'五') or (s == u'伍') or (s == '5'):
                to_word = u'5'
            elif (s == u'六') or (s == u'陸') or (s == '6'):
                to_word = u'6'
            elif (s == u'七') or (s == u'柒') or (s == '7'):
                to_word = u'7'
            elif (s == u'八') or (s == u'捌') or (s == '8'):
                to_word = u'8'
            elif (s == u'九') or (s == u'玖') or (s == '9'):
                to_word = u'9'
            else:
                to_word = s

        target = target + to_word
        return target

    def input_text_preprocessing(self, input_text):

        if type(input_text) is not unicode:
            input_text = input_text.decode('utf-8')

#         input_text = self.wordToNumber(input_text)
        return input_text

    def get_names(self, input_text):

        input_text = self.input_text_preprocessing(input_text)
        names = []
        words = pseg.cut(input_text)
        print words
        for w, f in words:
            if f.lower() == 'nr':
                names.append(w)
        for name in names:
            print name.encode('utf-8')
        return names

    def lcut(self, input_text):

        input_text = self.input_text_preprocessing(input_text)
        cut_raw = jieba.lcut(input_text)
        key = []

        for k in cut_raw:
            if self.stopwords:
                if k in self.stopwords_set:
                    continue
            
            key.append(k)
        df = pd.DataFrame({"word": key})
        return df

    def pseg_lcut(self, input_text):

        input_text = self.input_text_preprocessing(input_text)
        cut_raw = pseg.lcut(input_text)
        key = []
        value = []

        for k, v in cut_raw:
            tag = v
            if self.stopwords:
                if k in self.stopwords_set:
                    continue
            
            if k in self.taiwan_country():
                tag = u'ns'
            if len(k) > 1 and tag == u'x':
                tag = u'n'
            key.append(k)
            value.append(tag)
        df = pd.DataFrame({"word": key, "tag": value})
        return df

In [45]:
import jieba
import jieba.posseg as pseg
jieba_path = "/nfs/aq_test/jieba/"
jieba_dict_path1 = jieba_path + "dict_taiwan.txt"
jieba_dict_path2 = jieba_path +"userdict.txt"
jieba_dict_path3 = jieba_path +"dict.txt.big"
jieba_dict_path4 = jieba_path +"dict.txt.small"
jieba_stopwords_path = jieba_path +"stopwords.txt"
segmentor = JiebaSegmentor(jieba_dict_path1, 
                           [jieba_dict_path2,jieba_dict_path3,jieba_dict_path4], 
                           stopwords = False, 
                           stopwords_path=jieba_stopwords_path)

Building prefix dict from /nfs/aq_test/jieba/dict_taiwan.txt ...
DEBUG:jieba:Building prefix dict from /nfs/aq_test/jieba/dict_taiwan.txt ...
Loading model from cache /tmp/jieba.u34e30d1c15f49735c28e2158fa64cd7a.cache
DEBUG:jieba:Loading model from cache /tmp/jieba.u34e30d1c15f49735c28e2158fa64cd7a.cache
Loading model cost 0.578 seconds.
DEBUG:jieba:Loading model cost 0.578 seconds.
Prefix dict has been built succesfully.
DEBUG:jieba:Prefix dict has been built succesfully.


/nfs/aq_test/jieba/userdict.txt
/nfs/aq_test/jieba/dict.txt.big
/nfs/aq_test/jieba/dict.txt.small


In [7]:
# jieba & 詞性測試
test_cut =pseg.lcut('西瓜好大一顆')
text_subject_dict={}
for w,n in test_cut:
    print w + ' (' + n + ')'
    text_subject_dict.update({w:n})
print '---subject_dict---'
for k, v in text_subject_dict.items():
    print k + ' (' + v + ')'

西瓜 (ns)
好 (a)
大 (a)
一顆 (m)
---subject_dict---
一顆 (m)
西瓜 (ns)
好 (a)
大 (a)


In [8]:
test_cut_raw_0 = pseg.lcut('陳水扁')
for w,n in test_cut_raw_0:
    print w + ' (' + n + ')'

陳水扁 (nr)


In [216]:
unit_sentence = [u'什麼',u'哪個',u'哪些',u'那個',u'那些',u'哪兒',u'那兒']#,u'哪',u'那']
unit_sentence_rule = ''
for k,v in enumerate(unit_sentence):
    if k == len(unit_sentence) - 1:
        unit_sentence_rule = unit_sentence_rule + v
    else:
        unit_sentence_rule = unit_sentence_rule + v + '|'
print '(' + unit_sentence_rule + ')'

(什麼|哪個|哪些|那個|那些|哪兒|那兒)


In [9]:
class CheckQuestion:
    __metaclass__ = ABCMeta

    def __init__(self):
        pass

    class Label(object):
        WHY = 'why'
        WHEN = 'when'
        WHERE = 'where'
        WHO = 'who'
        WHAT = 'what'
        HOW = 'how'
        HOW_MANY = 'how_many'
        STATUS = 'status'
        FEEL = 'feel'
        OTHER = 'other'

    def get_v_subject(self):
        return ['v', 'vd', 'vg', 'vi', 'vn', 'vq', 'vt']

    def get_adj_subject(self):
        return ['a', 'ad', 'ag', 'an']

    def get_people_subject(self):
        return ['nr','nrfg','nrt']

    def get_location_subject(self):
        return ['ns']
    
    def get_n_subject(self):
        return ['n','ng','nt','nz']
 
    def get_eng_subject(self):
        return ['eng']
    
    def __get_unit_word(self):
        return [u'有沒有',u'什麼', u'哪一個', u'那一個', u'哪個', u'哪些', u'那個', u'那些', u'怎麼', u'哪兒', u'那兒', u'怎', u'哪', u'那']

    def get_adj_word_rule(self):
        return u'(鬼|怪|神祕|神奇)?(的)?'

    def get_unit_word_post_rule(self):

        unit_word = self.__get_unit_word()
        unit_word_rule = ''

        for k, v in enumerate(unit_word):
            unit_word_rule = unit_word_rule + v
            if k < len(unit_word) - 1:
                unit_word_rule = unit_word_rule + '|'

        unit_word_rule = '(' + unit_word_rule + ')' + u'(是)?'

        return unit_word_rule

    def get_unit_word_pre_rule(self):

        unit_word = self.__get_unit_word()
        unit_word_rule = ''

        for k, v in enumerate(unit_word):
            unit_word_rule = unit_word_rule + v
            if k < len(unit_word) - 1:
                unit_word_rule = unit_word_rule + '|'

        unit_word_rule = u'(是)?' + '(' + unit_word_rule + ')'

        return unit_word_rule

    def __repr__(self):
        return "{class_name}".format(class_name=self.__class__.__name__)

    @abstractmethod
    def check(self):
        pass


# 詢問原因

In [10]:
class CheckWhy(CheckQuestion):

    def __init__(self, segmentor):
        super(CheckWhy, self).__init__()
        self.label = ''
        self.target = ''
        self.segmentor = segmentor

    def check(self, input_text):
        """
        詢問原因:
        label: why
        句型：
        1.(為什麼|爲何|爲啥)
        2.通用疑問句+通用形容句+(原因|理由)
        3.(原因|理由) + 通用疑問句+通用形容句
        """
        self.label = ''
        self.target = ''
        
        #1
        rule = u'(為什麼|爲何|爲啥)'
        pattern = re.compile(rule)
        #         print rule
        match = pattern.search(input_text)
        if match is not None:
            self.label = self.Label.WHY
            return True

        #2
        rule = self.get_unit_word_post_rule() + self.get_adj_word_rule() + u'(原因|理由)'
        #         print rule
        pattern = re.compile(rule)
        match = pattern.search(input_text)
        if match is not None:
            self.label = self.Label.WHY
            return True

        #3
        rule = u'(原因|理由)' + self.get_unit_word_pre_rule() + self.get_adj_word_rule()
        #         print rule
        pattern = re.compile(rule)
        match = pattern.search(input_text)
        if match is not None:
            self.label = self.Label.WHY
            return True

    # cc = CheckWhy(u'小狗會叫的原因是什麼')
    # if cc.check():
    #     print cc.label

# 詢問時間

In [11]:
class CheckWhen(CheckQuestion):

    def __init__(self, segmentor):
        super(CheckWhen, self).__init__()
        self.label = ''
        self.target = ''
        self.segmentor = segmentor

    def check(self, input_text):
        """
        詢問時間:
        label: when
        """
        self.label = ''
        self.target = ''

        rule = u'(什麼時間|什麼時候|什麼年份|什麼月份|什麼年|什麼月|什麼日|什麼天|'+\
                u'哪時|那時|何時|哪一年|哪一月|哪一天|哪年|哪月|哪日|哪天|那一年|那一月|那一天|那年|那月|那日|那天)'
        #         print rule
        pattern = re.compile(rule)
        match = pattern.search(input_text)
        if match is not None:
            self.label = self.Label.WHEN
            return True

#         rule = self.get_unit_word_post_rule() + \
#                self.get_adj_word_rule() + \
#                u'(時間|時候|年份|月份|一年|一月|一天|年|月|日|天)'
#         #         print rule
#         pattern = re.compile(rule)
#         match = pattern.search(input_text)
#         # print match
#         if match is not None:
#             self.label = self.Label.WHEN
#             return True

#         rule = u'(時間|時候|年份|月份|一年|一月|一天|年|月|日|天)' + \
#                self.get_unit_word_pre_rule() + \
#                self.get_adj_word_rule()
#         #         print rule
#         pattern = re.compile(rule)
#         match = pattern.search(input_text)
#         if match is not None:
#             self.label = self.Label.WHEN
#             return True

    # # cc = CheckWhen(u'什麼年份發生法國大革命')
    # cc = CheckWhen(u'法國大革命發生年份是什麼')
    # if cc.check():
    #     print cc.label

# 詢問地點

In [12]:
class CheckWhere(CheckQuestion):

    def __init__(self, segmentor):
        super(CheckWhere, self).__init__()
        self.label = ''
        self.target = ''
        self.segmentor = segmentor

    def check(self, input_text):
        """
        詢問地點:
        label: where
        句型：
        1.問單一地點
        2.(在何處|在何地|何地|何處|在哪裡|哪裡|在哪兒|哪兒|在哪|在那裡|那裡|在那兒|那兒|在那)
        3.通用疑問句+通用形容句+(地點|地方|國家|省分|城市|城鎮|{詞性為ns}))
        """
        self.label = ''
        self.target = ''
        
        #1
        cut_df = self.segmentor.pseg_lcut(input_text)
        if len(cut_df) == 1 and cut_df['tag'][0] in self.get_location_subject():
            self.label = self.Label.WHERE
            self.target = cut_df['word'][0]
            return True
        
        #2
        rule = u'(在何處|在何地|何地|何處|在哪裡|哪裡|在哪兒|哪兒|在哪|在那裡|那裡|在那兒|那兒|在那)'
        #         print rule
        pattern = re.compile(rule)
        match = pattern.search(input_text)
        if match is not None:
            for index, row in cut_df.iterrows():
                w = row['word']
                n = row['tag']
                if n.lower() in self.get_location_subject(): #or n.lower() in self.get_eng_subject():
                    self.label = self.Label.WHERE
                    self.target = w
                    return True

            self.label = self.Label.WHERE
            return True
        
        #3
        rule = self.get_unit_word_post_rule() + \
               self.get_adj_word_rule() + \
               u'(地點|地方|國家|省分|城市|城鎮)'
        #         print rule
        pattern = re.compile(rule)
        match = pattern.search(input_text)
        # print match
        if match is not None:
            self.label = self.Label.WHERE
            return True

#         rule = self.get_unit_word_post_rule() + u'(.*)'
#         pattern = re.compile(rule)
#         match = pattern.search(input_text)

#         if match is not None:
# #             print match.group(1)
# #             print match.group(2)
# #             print match.group(3)

#             content = None
#             if match.group(2):
#                 content = match.group(2)

#             if match.group(3):
#                 content = match.group(3)

#             if content is not None:
#                 cut_df = self.segmentor.pseg_lcut(content)
#                 for index, row in cut_df.iterrows():
#                     w = row['word']
#                     n = row['tag']
#                     if n.lower() in self.get_location_subject():
#                         self.label = self.Label.WHERE
#                         self.target = w
#                         return True
#                     break

#         rule = u'(.*)' + self.get_unit_word_pre_rule()
#         #         print rule
#         pattern = re.compile(rule)
#         match = pattern.search(input_text)

#         if match is not None:
#             #             print match.group(1)

#             content = None
#             if match.group(1):
#                 content = match.group(1)

#             if content is not None:
#                 cut = self.segmentor.pseg_lcut(content)
#                 for w, n in cut:
#                     #                     print w + ' (' + n + ')'

#                     if n.lower() in self.get_location_subject():
#                         self.label = self.Label.WHERE
#                         return True
#                     break
    
# cc = CheckWhere(segmentor)
# if cc.check(u'[問卦] 有沒有subway塑膠手套ㄉ8卦?'):
#     print cc.label+ ' ; ' + cc.target

# 詢問人

In [35]:
class CheckWho(CheckQuestion):

    def __init__(self, segmentor):
        super(CheckWho, self).__init__()
        self.label = ''
        self.target = ''
        self.segmentor = segmentor

    def check(self, input_text):
        """
        詢問人:
        label: who
        句型：
        1.問單一人名
        2.(誰|哪位|那位)
        3.通用疑問句+通用形容句+(人)
        4.通用疑問句+通用形容句+{詞性為nr}
        5.{詞性為nr}+通用疑問句+通用形容句
        """
        self.label = ''
        self.target = ''
        
        #1
        cut_df = self.segmentor.pseg_lcut(input_text)
        print(cut_df)
        if len(cut_df) == 1 and cut_df['tag'][0] in self.get_people_subject():
            self.label = self.Label.WHO
            self.target = cut_df['word'][0]
            return True
        
        #2
        rule = u'(是誰|哪位|那位|什麼人|誰是)'
        #         print rule
        pattern = re.compile(rule)
        match = pattern.search(input_text)
        # print match
        if match is not None:
            for index, row in cut_df.iterrows():
                w = row['word']
                n = row['tag']
#                 print w
#                 print n
                if n.lower() in self.get_people_subject(): #or n.lower() in self.get_eng_subject():
                    self.label = self.Label.WHO
                    self.target = w
                    return True

            self.label = self.Label.WHO
            return True
        
#         #3
#         rule = self.get_unit_word_post_rule() + \
#                self.get_adj_word_rule() + \
#                u'(人)'
#         #         print rule
#         pattern = re.compile(rule)
#         match = pattern.search(input_text)
#         # print match
#         if match is not None:
#             self.label = self.Label.WHO
#             return True
        
#         #4
#         rule = self.get_unit_word_post_rule() + u'(.*)'
#         print rule
#         pattern = re.compile(rule)
#         match = pattern.search(input_text)

#         if match is not None:
#             #             print match.group(1)
#             #             print match.group(2)
#             #             print match.group(3)
#             content = None
#             if match.group(2):
#                 content = match.group(2)

#             if match.group(3):
#                 content = match.group(3)

#             if content is not None:
#                 cut_df = self.segmentor.pseg_lcut(content)
#                 for index, row in cut_df.iterrows():
#                     w = row['word']
#                     n = row['tag']
#                     print w
#                     print n
#                     if n.lower() in self.get_people_subject():
#                         self.label = self.Label.WHO
#                         self.target = w
#                         return True

#         #5
#         rule = u'(.*)' + self.get_unit_word_pre_rule()
#         #         print rule
#         pattern = re.compile(rule)
#         match = pattern.search(input_text)

#         if match is not None:
#             #             print match.group(1)

#             content = None
#             if match.group(1):
#                 content = match.group(1)

#             if content is not None:
#                 cut_df = self.segmentor.pseg_lcut(content)
#                 for index, row in cut_df.iterrows():
#                     w = row['word']
#                     n = row['tag']
#                     if n.lower() in self.get_people_subject():
#                         self.label = self.Label.WHO
#                         self.target = w
#                         return True
    
# cc = CheckWho(segmentor)
# if cc.check(u'[問卦] 科技業真的有那麼爽嗎?'):
#     print cc.label+ ' ; ' + cc.target

# 詢問一般事務

In [14]:
class CheckWhat(CheckQuestion):

    def __init__(self, segmentor):
        super(CheckWhat, self).__init__()
        self.label = ''
        self.target = ''
        self.segmentor = segmentor

    def check(self, input_text):
        """
        詢問一般事務:
        label: what
        句型：
        1.單一個字{詞性為名詞}
        2.(什麼是|哪個是|那個是)+{詞性為名詞}
        3.{詞性為名詞}+(是什麼|是哪個|是那個)
        
        """
        self.label = ''
        self.target = ''
        
        #1
        cut_df = self.segmentor.pseg_lcut(input_text)
#         print cut_df
        if len(cut_df) == 1:
            n = cut_df['tag'][0]
#             print n
            if n.lower() in self.get_n_subject()or \
                n.lower() in self.get_eng_subject():
#                 print 'in'
                self.label = self.Label.WHAT
                self.target = cut_df['word'][0]
                return True
        
        #2
        rule = u'(什麼是|哪個是|那個是)(.*)'
        pattern = re.compile(rule)
        match = pattern.search(input_text)

        if match is not None:
#             print match.group(1)
#             print match.group(2)

            content = None
            if match.group(1):
                content = match.group(1)
            
            if match.group(2):
                content = match.group(2)

            if content is not None:
                cut_df = self.segmentor.pseg_lcut(content)
#                 print 'cut_df'
#                 print cut_df
                for index, row in cut_df.iterrows():
                    w = row['word']
                    n = row['tag']
#                     if n.lower() not in self.get_people_subject() and \
#                             n.lower() not in self.get_location_subject():
                    if n.lower() in self.get_n_subject() or \
                        n.lower() in self.get_eng_subject():
                        self.label = self.Label.WHAT
                        self.target = w
                        return True

        #3
        rule = u'(.*)(是什麼|是哪個|是那個)'
        #         print rule
        pattern = re.compile(rule)
        match = pattern.search(input_text)

        if match is not None:
            content = None
            if match.group(1):
                content = match.group(1)

            if content is not None:
                cut_df = self.segmentor.pseg_lcut(content)
                for index, row in cut_df.iterrows():
                    w = row['word']
                    n = row['tag']
#                     print w
#                     print n
#                     if n.lower() not in self.get_people_subject() and \
#                             n.lower() not in self.get_location_subject():
                    if n.lower() in self.get_n_subject() or \
                        n.lower() in self.get_eng_subject():
                        self.label = self.Label.WHAT
                        self.target = w
                        return True

# cc = CheckWhat(segmentor)
# if cc.check(u'發牢騷'):
#     print cc.label+ ' ; ' + cc.target

# 詢問動作

In [15]:
class CheckHow(CheckQuestion):

    def __init__(self, segmentor):
        super(CheckHow, self).__init__()
        self.label = ''
        self.target = ''
        self.segmentor = segmentor

    def check(self, input_text):
        """
        詢問動作:
        label: how
        句型：
        1.(怎麼|怎樣|如何|教我|教導我|學|學會)+動詞
        """
        self.label = ''
        self.target = ''
        
        #1
        rule = u'(怎麼|怎樣|如何|教我|教導我|想學|學會)(.*)'
        pattern = re.compile(rule)
        match = pattern.search(input_text)

        if match is not None:
#             print match.group(1)
#             print match.group(2)

            content = match.group(2)
            if content is not None:
                cut_df = self.segmentor.pseg_lcut(content)
                for index, row in cut_df.iterrows():
                    w = row['word']
                    n = row['tag']
#                     print w
#                     print n
                    if n.lower() in self.get_v_subject():
                        self.label = self.Label.HOW
                        self.target = input_text
                        return True
                    break

# cc = CheckHow(segmentor)
# if cc.check(u'給大學生的建議'):
#     print cc.label+ ' ; ' + cc.target

# 詢問數字

In [16]:
class CheckHowMany(CheckQuestion):

    def __init__(self, segmentor):
        super(CheckHowMany, self).__init__()
        self.label = ''
        self.target = ''
        self.segmentor = segmentor

    def check(self, input_text):
        """
        詢問數字:
        label: howmany
        句型：
        1.(多少|幾)
        """
        self.label = ''
        self.target = ''
        
        #1
        rule = u'(多少|幾)'
        pattern = re.compile(rule)
        match = pattern.search(input_text)
        #         print rule
        if match is not None:
            self.label = self.Label.HOW_MANY
            return True

# cc = CheckHowMany(u'老闆多少錢阿一個')
# if cc.check():
#     print cc.label

# 詢問狀態

In [17]:
class CheckStatus(CheckQuestion):

    def __init__(self, segmentor):
        super(CheckStatus, self).__init__()
        self.label = ''
        self.target = ''
        self.segmentor = segmentor

    def check(self, input_text):
        """
        詢問狀態:
        label: what status
        句型：
        1.多 + 形容詞
        """
        self.label = ''
        self.target = ''
        
        #1
        rule = u'(多)(.*)'
        pattern = re.compile(rule)
        match = pattern.search(input_text)

        if match is not None:
            #             print match.group(1)
            #             print match.group(2)
            content = match.group(2)
            if content is not None:
                cut_df = self.segmentor.pseg_lcut(content)
                for index, row in cut_df.iterrows():
                    w = row['word']
                    n = row['tag']
                    if n.lower() in self.get_adj_subject():
                        self.label = self.Label.STATUS
                        return True
                    break


# 詢問看法

In [18]:
class CheckFeel(CheckQuestion):
    def __init__(self, input_text):
        super(CheckFeel, self).__init__()
        self.label = ''
        self.target = ''
        self.input_text = input_text

    def check(self):  
        """
        詢問看法:
        label: feel
        句型：
        1.(感覺|看法|想法|意見|覺得)(呢|勒|類|嗎|如何|怎樣|怎麼樣)
        """
        
        self.label = ''
        self.target = '' 
        
        #1
        rule = u'(感覺|看法|想法|意見|覺得)(呢|勒|類|嗎|如何|怎樣|怎麼樣)'
        pattern = re.compile(rule)
        match = pattern.search(self.input_text)
#         print rule
        if match is not None:
            self.label = self.Label.FEEL
            return True
    
# cc = CheckFeel(u'各位酸民覺得呢')
# if cc.check():
#     print cc.label

# 其他

In [19]:
class CheckOther(CheckQuestion):

    def __init__(self):
        super(CheckOther, self).__init__()

    def check(self, input_text):
        self.label = 'other'
        self.target = ''
        return True

In [228]:
jieba_dict_path1 = "/home/charles/dataset/jieba/dict_taiwan.txt"
jieba_dict_path2 = "/home/charles/dataset/jieba/userdict.txt"
jieba_dict_path3 = "/home/charles/dataset/jieba/dict.txt.big"
jieba_dict_path4 = "/home/charles/dataset/jieba/dict.txt.small"
jieba_stopwords_path = "/home/charles/dataset/jieba/stopwords.txt"
segmentor = JiebaSegmentor(jieba_dict_path1, 
                           [jieba_dict_path2,jieba_dict_path3,jieba_dict_path4], 
                           stopwords = False, 
                           stopwords_path=jieba_stopwords_path)

Building prefix dict from /home/charles/dataset/jieba/dict_taiwan.txt ...
DEBUG:jieba:Building prefix dict from /home/charles/dataset/jieba/dict_taiwan.txt ...
Loading model from cache /tmp/jieba.u48306fa201322dcccc3d0c62898fbadc.cache
DEBUG:jieba:Loading model from cache /tmp/jieba.u48306fa201322dcccc3d0c62898fbadc.cache
Loading model cost 0.867 seconds.
DEBUG:jieba:Loading model cost 0.867 seconds.
Prefix dict has been built succesfully.
DEBUG:jieba:Prefix dict has been built succesfully.


/home/charles/dataset/jieba/userdict.txt
/home/charles/dataset/jieba/dict.txt.big
/home/charles/dataset/jieba/dict.txt.small


In [21]:
from functools import wraps
import regex as re

In [36]:
def cache(func):
    memo = {}

    @wraps(func)
    def _wrapper(*args):
        res = memo.get(args, None)
        if res is not None:
            print 'cache exist'
            return res
        else:
            res = func(*args)
            memo[args] = res
        return res
    return _wrapper

class WikiPedia:
    
    def __init__(self, lang='zh'):
        self.__lang = lang
   
    def set_lang(self, lang):
        self.__lang = lang

    def wiki_query_list(self, query, lang):
        """
        查詢wiki單字解釋與相關詞API
        """

        url = ("https://"+lang+".wikipedia.org/w/api.php?action=opensearch&search="+query+"&utf8")    
        response = requests.post(url)
        if response.status_code == requests.codes.ok:
            result = response.json()
        #     print result[0]
            if len(result[2]) == 0:
                return None

            content = result[2][0]
            if content == u"" or u"重定向" in content:  
                print 'redirect'
                return None

            return content
        
    def wiki_query_snippet(self, query, lang):
    
        def query_api(query_str, lang):
            
            search_list = []
            url = ("https://"+lang+".wikipedia.org/w/api.php?action=query&list=search&srsearch="+
                    query_str+"&format=json&formatversion=2")    
            response = requests.post(url)
            if response.status_code == requests.codes.ok:

                result = response.json()
                if "query" in result:
                    if "search" in result["query"]:
                        search_list = result["query"]["search"]

            return search_list

        search_list = query_api(query, lang)
        if search_list:

            if search_list[0].has_key("title"):
                title = search_list[0]["title"]
                search_list = query_api(title, lang)  
                if search_list:
                    if search_list[0].has_key("snippet") and\
                        search_list[0]["snippet"]:

                        snippet = search_list[0]["snippet"]
                        snippet = snippet.split(u'。')[0]
                        snippet = re.sub(r'</?\w+[^>]*>','',snippet)+u'。'
                        return snippet
        return None
    
    @cache
    def summery(self, query):
        """
        查詢wiki API
        1.查詢wiki單字解釋與相關詞API
        2.查詢wiki單字解釋片段API
        都以中文wiki百科查詢, 當出現重定向(縮寫或只有簡體文章時發生), 改由查詢wiki單字解釋片段API
        wiki單字解釋片段API的答案不完整, 所以不考慮一開始就呼叫
        爲了提高英文單字準確性, 會使用wiki單字解釋片段API查出的title再當單字查詢一次
        """

        def check_contain_chinese(query):

            for ch in query:
                if u'\u4e00' <= ch <= u'\u9fff':
                    return True
            return False

        if not query:
            return None

        if type(query) is not unicode:
            query = query.decode('utf-8')

    #     if check_contain_chinese(query):
    #         lang = 'zh'
    #     else:
    #         lang = 'en'

        lang = 'zh'

        print 'wiki query : ' + query
        for result in [self.wiki_query_list(query, lang), 
                       self.wiki_query_snippet(query, lang)]:
            if result:
                return result
        return None

In [37]:
from collections import namedtuple
class QuestionTypeCheck:

    def __init__(self, segmentor, source='user'):
        self.segmentor = segmentor
        self.source = source
        self.check_pipeline = [CheckWhy(self.segmentor),
                              CheckWhen(self.segmentor),
                              CheckWhere(self.segmentor),
                              CheckWho(self.segmentor),
                              CheckHow(self.segmentor),
                              CheckHowMany(self.segmentor),
                              CheckStatus(self.segmentor),
                              CheckWhat(self.segmentor)]
        #                       CheckFeel(self.segmentor)]

    QuestionTypeTuple = namedtuple('QuestionTypeTuple', {
                                    'label',
                                    'target',
                                })    
    
    @property
    def source(self):
        return self.source

    @source.setter
    def source(self, source):
        self.source = source
    
    def check_question_type(self, input_text):
        """
        [ Command ] design pattern
        """ 
        if type(input_text) is not unicode:
            input_text = input_text.decode('utf-8')
        
#         print input_text
        if self.source == 'ptt':
            # ptt text clean
            input_text = input_text.replace(u'Re: ', u'')
            input_text = re.sub(u"\\[.*?]", u"", input_text)
        elif self.source == 'dcard':
            input_text = re.sub(u"\\(.*?\\)|\\{.*?}|\\[.*?]|\\（.*?）|\\【.*?】|\\#.*? |\\#.*?#", "", input_text)
              
        label = ''
        target = ''

        # 有被標注到就不往下做
        for cmd in self.check_pipeline:
            if cmd.check(input_text):
                label = cmd.label
                target = cmd.target
                break

        if label == '':
            cmd = CheckOther()
            cmd.check(input_text)
            label = cmd.label
        
        qt = self.QuestionTypeTuple(label=label,
                                   target=target)
        
#         print 'label = ' + qt.label
#         print 'target = ' + qt.target
#         if qt.label in ['what', 'where', 'who']:
#             wp = WikiPedia()
#             print wp.summery(qt.target)
#         elif qt.label in ['how']:
#             # google整句丟入
#             google_search(qt.target)
        return qt

In [38]:
def google_search(query):
    
    if not query:
        return None
    
    if type(query) is not unicode:
        query = query.decode('utf-8')
        
    print 'google query : ' + query
        
    # Google 搜尋 URL
    google_url = 'https://www.google.com.tw/search'

    # 查詢參數
    my_params = {'q': query, 'lr':'lang_zh-TW'}

    # 下載 Google 搜尋結果
    r = requests.get(google_url, params = my_params)

    # 確認是否下載成功
    if r.status_code == requests.codes.ok:
      # 以 BeautifulSoup 解析 HTML 原始碼
      soup = BeautifulSoup(r.text, 'html.parser')

      # 觀察 HTML 原始碼
      # print(soup.prettify())

      # 以 CSS 的選擇器來抓取 Google 的搜尋結果
      items = soup.select('div.g > h3.r > a[href^="/url"]')
      for i in items[:5]:
        # 標題
        print(u"標題：" + i.text)
        # 網址
        print(u"網址：" + i.get('href').lstrip('/url?q='))

In [46]:
check = QuestionTypeCheck(segmentor)
check.source = 'dcard'

In [47]:
check.check_question_type(u'Re: [新聞]黃之瀚提美台')

   tag word
0  eng   Re
1    x    :
2    x     
3   nr  黃之瀚
4    v    提
5    n   美台


QuestionTypeTuple(target='', label='other')

In [48]:
check.check_question_type(u'老闆這袋子西瓜有幾顆')

  tag word
0   n   老闆
1  zg    這
2   n   袋子
3  ns   西瓜
4   v    有
5   m   幾顆


QuestionTypeTuple(target='', label='how_many')

In [49]:
check.check_question_type(u'北京在哪')

QuestionTypeTuple(target=u'\u5317\u4eac', label='where')

In [50]:
check.check_question_type(u'蘇貞昌')

  tag word
0  nr  蘇貞昌


QuestionTypeTuple(target=u'\u8607\u8c9e\u660c', label='who')

In [51]:
check.check_question_type(u'哪個西瓜這麼大一顆')
# error......

  tag word
0   r   哪個
1  ns   西瓜
2   r   這麼
3   a    大
4   m   一顆


QuestionTypeTuple(target='', label='other')

In [52]:
check.check_question_type(u'馬英九')

  tag word
0  nr  馬英九


QuestionTypeTuple(target=u'\u99ac\u82f1\u4e5d', label='who')

In [53]:
check.check_question_type(u'關於封鎖')

  tag word
0   p   關於
1  nr   封鎖


QuestionTypeTuple(target='', label='other')

In [54]:
check.check_question_type(u'老闆這個水果多少錢呢')

  tag word
0   n   老闆
1   r   這個
2   n   水果
3  nr  多少錢
4   y    呢


QuestionTypeTuple(target='', label='how_many')

In [55]:
check.check_question_type(u'老闆這個水果多高')

  tag word
0   n   老闆
1   r   這個
2   n   水果
3   m    多
4   a    高


QuestionTypeTuple(target='', label='status')

In [56]:
check.check_question_type(u'姚明身高有多高呢')

  tag word
0  nr   姚明
1   v   身高
2   v    有
3   m    多
4   a    高
5   y    呢


QuestionTypeTuple(target='', label='status')

In [57]:
check.check_question_type(u'林百里是哪位')

  tag word
0  nr  林百里
1   v    是
2   r   哪位


QuestionTypeTuple(target=u'\u6797\u767e\u91cc', label='who')

In [245]:
check.check_question_type(u'請問apple是什麼')

QuestionTypeTuple(target=u'apple', label='what')

In [246]:
check.check_question_type(u'[問卦] 有沒有subway塑膠手套ㄉ8卦?')

QuestionTypeTuple(target='', label='other')

In [247]:
check.check_question_type(u'我想學游泳')

QuestionTypeTuple(target=u'\u6211\u60f3\u5b78\u6e38\u6cf3', label='how')

In [248]:
check.check_question_type(u'連勝文')

QuestionTypeTuple(target=u'\u9023\u52dd\u6587', label='who')

In [249]:
check.check_question_type('給大學生的建議')

QuestionTypeTuple(target='', label='other')

In [250]:
# ppp

# question type setting

In [251]:
dao = CassandraDAO('BACKUP')
HELPER_KEYSPACE = 'nlp_keyspace'
DCARD_ARTICLE_TABLE = 'dcard_article'
DCARD_ARTICLE_TEST_TABLE = 'dcard_article_test'

contact_points = 192.168.0.121,192.168.0.122,192.168.0.52


In [None]:
cql = 'select * from ' + DCARD_ARTICLE_TEST_TABLE + ';'
## select range
# cql = ("select * from nlp_keyspace.dcard_article_test " +
#         "where article_id > 225705809 and article_id < 225800010 allow filtering;")
pd_df = dao.execCQLSelectToPandasDF(HELPER_KEYSPACE, cql)
pd_df

In [275]:
def set_question_type(row):
    title = row['question']
    qt = check.check_question_type(title)
    return qt.label

pd_df['question_type'] = pd_df.apply(set_question_type, axis=1)
pd_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,article_id,board,category,content,crawl_date,create_date,question,question_target,question_type,title
0,224297810,job,,想請問在健身房的工作內容有什麼呢需要做什麼 還是要會什麼技能櫃檯/教練,2019-02-15 07:49:24.035,2016-07-03T16:51:05.200Z,健身房工作經驗,,other,健身房工作經驗（問）
1,225961291,talk,,首次發文，手機排版，請見諒（跟朋友借的帳號）首先，刺青是一門藝術並不是每個刺青的人都是屁孩 ...,2019-02-19 08:25:10.219,2017-03-11T09:36:15.788Z,刺青的意義,,other,#圖 刺青的意義
2,225196435,food,,,2019-02-13 01:39:36.743,2016-11-15T03:16:45.628Z,蜂蜜真假,,other,蜂蜜真假
3,224746504,talk,,各種淡紫控啊~覺得淡紫超美的～～😘還在繼續搜集中～(燈光比較不好，淡紫色沒有很明顯…😭)－愛...,2019-02-19 06:09:49.412,2016-09-12T14:33:05.434Z,愛各種淡紫色,,other,愛各種淡紫色
4,230071785,trending,,剛剛看到謝和炫出的這首歌，聽完簡直感動的說不出話來....台北年輕人這次一定要出來投票衝高投...,2019-02-15 02:46:30.961,2018-11-15T19:22:07.846Z,台灣的未來交給柯文哲！,,other,台灣的未來交給柯文哲！
5,226029225,food,,不知道有沒有人記得前幾天的單身狗廚房肉篇XD這次小廚跟朋友一行人去南投的某茶園露營(不透漏確...,2019-02-13 03:28:19.805,2017-03-22T04:04:36.302Z,單身狗廚房x野外露營篇,,other,#圖 單身狗廚房x野外露營篇
6,226491861,food,,想跟大家分享這間超好吃的日本料理店！他位於現在很多人要去的小琉球的乘船處----東港！我個人...,2019-02-13 03:54:05.395,2017-05-28T16:40:26.874Z,龍允日式手作壽司,,other,#食記 #東港 龍允日式手作壽司
7,229830476,language,,請問第三句 那個「となり 」是翻成登山「時」的意思嗎？,2019-02-15 07:05:51.092,2018-10-11T12:06:50.282Z,日文翻譯問題,,other,日文翻譯問題
8,465696,talk,,ㄜ 因為突然想到很想知道，雖然感覺這樣問有點奇怪= =前幾個星期有看到新聞在介紹，有個女生之...,2019-02-18 06:33:17.498,2015-08-17T13:33:21.449Z,尋~一個卡片遊戲 給背包客專用 (電視上看到,,other,尋~一個卡片遊戲(?) 給背包客專用 (電視上看到
9,230081344,trending,,本人台中人從小有嚴重過敏體質，鼻子容易塞住，也很容易流鼻水，更容易狂打噴嚏，成年之後狀況已經...,2019-02-15 02:47:31.216,2018-11-17T11:02:23.407Z,我不想吸髒空氣,,other,我不想吸髒空氣


In [276]:
def set_question_target(row):
    qt = check.check_question_type(row['question'])
    return qt.target

pd_df['question_target'] = pd_df.apply(set_question_target, axis=1)
pd_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,article_id,board,category,content,crawl_date,create_date,question,question_target,question_type,title
0,224297810,job,,想請問在健身房的工作內容有什麼呢需要做什麼 還是要會什麼技能櫃檯/教練,2019-02-15 07:49:24.035,2016-07-03T16:51:05.200Z,健身房工作經驗,,other,健身房工作經驗（問）
1,225961291,talk,,首次發文，手機排版，請見諒（跟朋友借的帳號）首先，刺青是一門藝術並不是每個刺青的人都是屁孩 ...,2019-02-19 08:25:10.219,2017-03-11T09:36:15.788Z,刺青的意義,,other,#圖 刺青的意義
2,225196435,food,,,2019-02-13 01:39:36.743,2016-11-15T03:16:45.628Z,蜂蜜真假,,other,蜂蜜真假
3,224746504,talk,,各種淡紫控啊~覺得淡紫超美的～～😘還在繼續搜集中～(燈光比較不好，淡紫色沒有很明顯…😭)－愛...,2019-02-19 06:09:49.412,2016-09-12T14:33:05.434Z,愛各種淡紫色,,other,愛各種淡紫色
4,230071785,trending,,剛剛看到謝和炫出的這首歌，聽完簡直感動的說不出話來....台北年輕人這次一定要出來投票衝高投...,2019-02-15 02:46:30.961,2018-11-15T19:22:07.846Z,台灣的未來交給柯文哲！,,other,台灣的未來交給柯文哲！
5,226029225,food,,不知道有沒有人記得前幾天的單身狗廚房肉篇XD這次小廚跟朋友一行人去南投的某茶園露營(不透漏確...,2019-02-13 03:28:19.805,2017-03-22T04:04:36.302Z,單身狗廚房x野外露營篇,,other,#圖 單身狗廚房x野外露營篇
6,226491861,food,,想跟大家分享這間超好吃的日本料理店！他位於現在很多人要去的小琉球的乘船處----東港！我個人...,2019-02-13 03:54:05.395,2017-05-28T16:40:26.874Z,龍允日式手作壽司,,other,#食記 #東港 龍允日式手作壽司
7,229830476,language,,請問第三句 那個「となり 」是翻成登山「時」的意思嗎？,2019-02-15 07:05:51.092,2018-10-11T12:06:50.282Z,日文翻譯問題,,other,日文翻譯問題
8,465696,talk,,ㄜ 因為突然想到很想知道，雖然感覺這樣問有點奇怪= =前幾個星期有看到新聞在介紹，有個女生之...,2019-02-18 06:33:17.498,2015-08-17T13:33:21.449Z,尋~一個卡片遊戲 給背包客專用 (電視上看到,,other,尋~一個卡片遊戲(?) 給背包客專用 (電視上看到
9,230081344,trending,,本人台中人從小有嚴重過敏體質，鼻子容易塞住，也很容易流鼻水，更容易狂打噴嚏，成年之後狀況已經...,2019-02-15 02:47:31.216,2018-11-17T11:02:23.407Z,我不想吸髒空氣,,other,我不想吸髒空氣


In [280]:
filter_pd = pd_df[pd_df['question_type'] != 'other']
print len(filter_pd)
# filter_pd[['article_id','board','title','question','question_type','question_target']]
for i,r in filter_pd.iterrows():
    print r['title'] + ' | ' + r['question'] + ' | ' + r['question_type'] + ' | ' + r['question_target']

309
＃打工  寒假如何找超過基本工資的打工？ | ＃打工  寒假如何找超過基本工資的打工？ | how | ＃打工  寒假如何找超過基本工資的打工？
未來的出路在哪裏 | 未來的出路在哪裏 | where | 
#評論 國民黨產帳怎麼算？退將：民進黨也曾被餵黨產奶水 | 國民黨產帳怎麼算？退將：民進黨也曾被餵黨產奶水 | how | 國民黨產帳怎麼算？退將：民進黨也曾被餵黨產奶水
發發牢騷 | 發發牢騷 | what | 發發牢騷
公投怎麼投？ | 公投怎麼投？ | how | 公投怎麼投？
畢業旅行～有哪些地方可以選擇呢 | 畢業旅行～有哪些地方可以選擇呢 | where | 
錢該怎麼算 | 錢該怎麼算 | how | 錢該怎麼算
智力測驗 | 智力測驗 | what | 智力測驗
拉K的味道怎麼辨別 | 拉K的味道怎麼辨別 | how | 拉K的味道怎麼辨別
（問）整棟只有一個電表 那電費大家會如何算呢 | 整棟只有一個電表 那電費大家會如何算呢 | how | 整棟只有一個電表 那電費大家會如何算呢
怎麼判別甲甲 | 怎麼判別甲甲 | how | 怎麼判別甲甲
台北下著雨，餓著肚子的人何處去? | 台北下著雨，餓著肚子的人何處去? | where | 台北
智利櫻桃現在市價大約多少呢？ | 智利櫻桃現在市價大約多少呢？ | how_many | 
為什麼未滿18歲不能看限制級電影 | 為什麼未滿18歲不能看限制級電影 | why | 
大家畢業多久才找到工作? | 大家畢業多久才找到工作? | status | 
劍靈 | 劍靈 | where | 劍靈
【提問】最近CPBL熱身賽的主播跟球評到底是誰....? | 最近CPBL熱身賽的主播跟球評到底是誰....? | who | 
這個甜點哪裡買的到呢 | 這個甜點哪裡買的到呢 | where | 
驅逐出場 | 驅逐出場 | what | 驅逐出場
真的窮到不知如何是好，求分享 | 真的窮到不知如何是好，求分享 | how | 真的窮到不知如何是好，求分享
大學生必看！「甜課」的日文怎麼說呢？ | 大學生必看！「甜課」的日文怎麼說呢？ | how | 大學生必看！「甜課」的日文怎麼說呢？
要怎麼加入民進黨黨工啊 | 要怎麼加入民進黨黨工啊 | how | 要怎麼加入民進黨黨工啊
#問#西班牙文 | 西

月亮 | 月亮 | what | 月亮
#徵才 工讀生 | 工讀生 | what | 工讀生
簽名檔 | 簽名檔 | what | 簽名檔
統神到底是誰? | 統神到底是誰? | who | 
車禍問題 該怎麼處理 | 車禍問題 該怎麼處理 | how | 車禍問題 該怎麼處理
多少人把這邊當相親網站 ? | 多少人把這邊當相親網站 ? | how_many | 
#更 到底值多少錢？ | 到底值多少錢？ | how_many | 
#問 股票 | 股票 | what | 股票
大家房租都多少錢啊？? | 大家房租都多少錢啊？? | how_many | 
雲林虎尾有哪裡剪髮不錯的嗎? | 雲林虎尾有哪裡剪髮不錯的嗎? | where | 雲林
災後 | 災後 | what | 災後
遠傳入主中嘉為什麼不好? | 遠傳入主中嘉為什麼不好? | why | 
#圖#花蓮#台東 美食 | 美食 | what | 美食
選情 | 選情 | what | 選情
#問 如何阻止別人辦信用卡 | 如何阻止別人辦信用卡 | how | 如何阻止別人辦信用卡
【轉貼】低薪?沒人才? 台灣哪裡沒人才了 人材多的是 | 低薪?沒人才? 台灣哪裡沒人才了 人材多的是 | where | 
誰是統獨教父？請理性看待統獨 | 誰是統獨教父？請理性看待統獨 | who | 
為什麼大家跑操場都跑逆時鐘 | 為什麼大家跑操場都跑逆時鐘 | why | 
趕潮流之如何分辨台科生 | 趕潮流之如何分辨台科生 | how | 趕潮流之如何分辨台科生
學了第二外語，才知道什麼是真正的學語言 | 學了第二外語，才知道什麼是真正的學語言 | what | 學
沖繩 | 沖繩 | what | 沖繩
洗手檯 | 洗手檯 | what | 洗手檯
請問國外adidas nmd R1 哪裡買？ | 請問國外adidas nmd R1 哪裡買？ | where | 
請問新手怎麼挑選股票 | 請問新手怎麼挑選股票 | how | 請問新手怎麼挑選股票
為什麼做甜點? | 為什麼做甜點? | why | 
Snapchat | Snapchat | what | Snapchat
香腸為什麼叫煙強？ | 香腸為什麼叫煙強？ | why | 
出國考察 | 出國考察 | what | 出國考察
Follow | Fol

In [278]:
ppp

NameError: name 'ppp' is not defined

In [281]:
#insert 
DCARD_ARTICLE_TABLE = 'dcard_article_test'
# cql = cql + 'BEGIN BATCH '
for index, row in pd_df.iterrows():

    cql = ("insert into " + DCARD_ARTICLE_TABLE + "(article_id, question, question_type) " + 
          "values(" + str(row['article_id']) + ",'" + 
          row['question'] + "','" + row['question_type'] + "');")
#     print cql
#     dao.execCQL(HELPER_KEYSPACE, cql)
    
# cql = cql + ' APPLY BATCH '
# print cql

insert into dcard_article_test(article_id, question, question_type) values(224297810,'健身房工作經驗','other');
insert into dcard_article_test(article_id, question, question_type) values(225961291,'刺青的意義','other');
insert into dcard_article_test(article_id, question, question_type) values(225196435,'蜂蜜真假','other');
insert into dcard_article_test(article_id, question, question_type) values(224746504,'愛各種淡紫色','other');
insert into dcard_article_test(article_id, question, question_type) values(230071785,'台灣的未來交給柯文哲！','other');
insert into dcard_article_test(article_id, question, question_type) values(226029225,'單身狗廚房x野外露營篇','other');
insert into dcard_article_test(article_id, question, question_type) values(226491861,'龍允日式手作壽司','other');
insert into dcard_article_test(article_id, question, question_type) values(229830476,'日文翻譯問題','other');
insert into dcard_article_test(article_id, question, question_type) values(465696,'尋~一個卡片遊戲 給背包客專用 (電視上看到','other');
insert into dcard_article_test(article_id

insert into dcard_article_test(article_id, question, question_type) values(658066,'畢業旅行～有哪些地方可以選擇呢','where');
insert into dcard_article_test(article_id, question, question_type) values(225084801,'關於公關這個工作 2','other');
insert into dcard_article_test(article_id, question, question_type) values(224721291,'日式彩虹胡麻烏龍麵','other');
insert into dcard_article_test(article_id, question, question_type) values(224911186,'涉性侵少女 林依晨前男友遭訴','other');
insert into dcard_article_test(article_id, question, question_type) values(1173439,'嘉義微食記分享 part2','other');
insert into dcard_article_test(article_id, question, question_type) values(228808840,'請問例句翻譯','other');
insert into dcard_article_test(article_id, question, question_type) values(224076541,'誠徵 工讀生','other');
insert into dcard_article_test(article_id, question, question_type) values(207263,'有推薦的畢業禮物嘛','other');
insert into dcard_article_test(article_id, question, question_type) values(227789040,'兩日遊吃了什麼😋','other');
insert into dcard_article_test(artic

insert into dcard_article_test(article_id, question, question_type) values(224658723,'有沒有躺著玩手機結果掉下來打到自己臉的掛','other');
insert into dcard_article_test(article_id, question, question_type) values(228253086,'別相信任何人','other');
insert into dcard_article_test(article_id, question, question_type) values(216768478,'#圖🍲🍲鮮蔬雞肉燴飯🍲🍲','other');
insert into dcard_article_test(article_id, question, question_type) values(229913174,'幫民進黨思考兩岸新套路','other');
insert into dcard_article_test(article_id, question, question_type) values(216760,'Re: 「請孫文、黨旗、黨歌退出校園」公民行動連署','other');
insert into dcard_article_test(article_id, question, question_type) values(1000643,'努力減肥想考空服','other');
insert into dcard_article_test(article_id, question, question_type) values(227498538,'謊言軍隊（文長','other');
insert into dcard_article_test(article_id, question, question_type) values(230194224,'華航飛機餐','other');
insert into dcard_article_test(article_id, question, question_type) values(225210291,'求指點迷津文長','other');
insert into dcard_arti

insert into dcard_article_test(article_id, question, question_type) values(988319,'可以不要挺大麻嗎?','other');
insert into dcard_article_test(article_id, question, question_type) values(224631271,'速可達１２５比較','other');
insert into dcard_article_test(article_id, question, question_type) values(1114017,'聯合國將禁止日本性暴力ACG','other');
insert into dcard_article_test(article_id, question, question_type) values(224787551,'同理心？','other');
insert into dcard_article_test(article_id, question, question_type) values(224464674,'蝦仁炒飯！！！GAN','other');
insert into dcard_article_test(article_id, question, question_type) values(228346629,'  金門古法道地蚵仔煎','other');
insert into dcard_article_test(article_id, question, question_type) values(40475,'深夜串詞－曖昧','other');
insert into dcard_article_test(article_id, question, question_type) values(226826924,'7/18 乳摸','other');
insert into dcard_article_test(article_id, question, question_type) values(225777148,'半年特休','other');
insert into dcard_article_test(article_id, question, 

insert into dcard_article_test(article_id, question, question_type) values(224374928,'妳最受不了妳男朋友做什麼？','other');
insert into dcard_article_test(article_id, question, question_type) values(224524957,'人手拿哀鳳不過就是小確辛','other');
insert into dcard_article_test(article_id, question, question_type) values(1056577,'討厭師大檢察官的到底是抱持著什麼樣的想法??','other');
insert into dcard_article_test(article_id, question, question_type) values(227556810,'廣告配唱、DEMO歌手接案','other');
insert into dcard_article_test(article_id, question, question_type) values(948706,'關於阿嬤的藥','other');
insert into dcard_article_test(article_id, question, question_type) values(225696831,'擦肩而過','other');
insert into dcard_article_test(article_id, question, question_type) values(47727586,'Re 不是歧視肥宅，只是純粹不喜歡動漫','other');
insert into dcard_article_test(article_id, question, question_type) values(181809,'自己做超豐盛晚餐','other');
insert into dcard_article_test(article_id, question, question_type) values(124018699,'轉  爽翻了！台灣詐欺犯抵中國　全自由了','other');
insert int

insert into dcard_article_test(article_id, question, question_type) values(54992,'一陣子沒看到寵物文了  大家來分享一下自己家的寵物吧','other');
insert into dcard_article_test(article_id, question, question_type) values(229195325,'雙牛沙拉佐香芒莎莎醬','other');
insert into dcard_article_test(article_id, question, question_type) values(228508228,'711消失的食物😫😫😫','other');
insert into dcard_article_test(article_id, question, question_type) values(226644646,'這個甜點哪裡買的到呢','where');
insert into dcard_article_test(article_id, question, question_type) values(225000397,'保險小故事','other');
insert into dcard_article_test(article_id, question, question_type) values(229423221,'驅逐出場','what');
insert into dcard_article_test(article_id, question, question_type) values(225122718,'築地鮮魚之鮭魚肚丼飯','other');
insert into dcard_article_test(article_id, question, question_type) values(224756562,'林全施政報告 參與太陽花團體批「偷渡服貿」','other');
insert into dcard_article_test(article_id, question, question_type) values(229332504,'聽袂落企！台灣不缺水電？　賴清德：缺的是信心','other');
inse

insert into dcard_article_test(article_id, question, question_type) values(224254275,'會計師事務所實習','other');
insert into dcard_article_test(article_id, question, question_type) values(225098440,'想問大家知道哪裡還喝得到舞茶道嗎','where');
insert into dcard_article_test(article_id, question, question_type) values(17159,'各位自拍神潮人請進','other');
insert into dcard_article_test(article_id, question, question_type) values(925733,'工作與證照','other');
insert into dcard_article_test(article_id, question, question_type) values(230204863,'㊙️多益聽力最愛考的家用品','other');
insert into dcard_article_test(article_id, question, question_type) values(225443957,'死前必嘗美食超隨便推薦','other');
insert into dcard_article_test(article_id, question, question_type) values(228230384,'揪團 VoiceTube Hero ！','other');
insert into dcard_article_test(article_id, question, question_type) values(224469911,'落枕','other');
insert into dcard_article_test(article_id, question, question_type) values(162966,'遇到變態電話怎麼辦','other');
insert into dcard_article_test(artic

insert into dcard_article_test(article_id, question, question_type) values(228865693,'越南薑黃蝦餅','other');
insert into dcard_article_test(article_id, question, question_type) values(228575950,'難怪薪水升不起來呀','other');
insert into dcard_article_test(article_id, question, question_type) values(102093,'關於314反核大遊行，你的決定，是救世主還是殺人魔?','other');
insert into dcard_article_test(article_id, question, question_type) values(224123557,'求桃園的甜點塔','other');
insert into dcard_article_test(article_id, question, question_type) values(230002095,'日文自學的小心得','other');
insert into dcard_article_test(article_id, question, question_type) values(224101503,'腐女本性被弟弟發現了','other');
insert into dcard_article_test(article_id, question, question_type) values(200145,'所謂沈默螺旋','other');
insert into dcard_article_test(article_id, question, question_type) values(227914577,'關於澳洲打工渡假...','other');
insert into dcard_article_test(article_id, question, question_type) values(228893885,'45,000投資什麼呢？','other');
insert into dcard_article_tes

insert into dcard_article_test(article_id, question, question_type) values(230678117,'請益','other');
insert into dcard_article_test(article_id, question, question_type) values(378382,'醒醒吧/創作文/你想很久了吧？','other');
insert into dcard_article_test(article_id, question, question_type) values(15874,'叩叩叩，有研究生嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(230637629,'回應一下東海姐文章中批評台灣兩位領導人的兩個人','other');
insert into dcard_article_test(article_id, question, question_type) values(224901135,'腦袋裝水泥的客人','other');
insert into dcard_article_test(article_id, question, question_type) values(707151,'高雄爆便宜肉多排骨便當','other');
insert into dcard_article_test(article_id, question, question_type) values(146643,'去唱歌的頻率和時間','other');
insert into dcard_article_test(article_id, question, question_type) values(29111731,'深夜宋仲基吵醒我弟','other');
insert into dcard_article_test(article_id, question, question_type) values(2381992,'關於房子','other');
insert into dcard_article_test(article_id, 

insert into dcard_article_test(article_id, question, question_type) values(225590306,'同性的問題……','other');
insert into dcard_article_test(article_id, question, question_type) values(224170015,'真羨慕有另一半','other');
insert into dcard_article_test(article_id, question, question_type) values(228493646,'火鍋店外場工讀生','other');
insert into dcard_article_test(article_id, question, question_type) values(30169,'困擾打字的時候會不小心把字念出來','other');
insert into dcard_article_test(article_id, question, question_type) values(228610362,'雄獅旅遊門市面試','other');
insert into dcard_article_test(article_id, question, question_type) values(68981,'大家有沒有夢過偶像的經驗??>//////<','other');
insert into dcard_article_test(article_id, question, question_type) values(230374099,' 一直炒股炒房會怎麼樣？ ','other');
insert into dcard_article_test(article_id, question, question_type) values(228863229,'台中精密科學園區','other');
insert into dcard_article_test(article_id, question, question_type) values(227923974,'時區改不改 內政部12/19公開回覆','other');
insert into dcard_a

insert into dcard_article_test(article_id, question, question_type) values(230656974,'加班費','what');
insert into dcard_article_test(article_id, question, question_type) values(230019234,'22槍擊斃車內通緝犯 檢：看彈著高度 警員沒過失','other');
insert into dcard_article_test(article_id, question, question_type) values(224468481,'展場打工的那些事','other');
insert into dcard_article_test(article_id, question, question_type) values(228757356,'有看沒懂的英文句子','other');
insert into dcard_article_test(article_id, question, question_type) values(224769564,'我現在出門大家都知道我沒洗澡了','other');
insert into dcard_article_test(article_id, question, question_type) values(224593002,'中了大樂透後會想工作嗎?','other');
insert into dcard_article_test(article_id, question, question_type) values(227028413,'被資遣會留下資料?','other');
insert into dcard_article_test(article_id, question, question_type) values(226076,'FB的神祕活動','other');
insert into dcard_article_test(article_id, question, question_type) values(10077,'推薦好友','other');
insert into dcard_article_test(arti

insert into dcard_article_test(article_id, question, question_type) values(229599190,'遭P2P網貸倒閉坑殺 少婦對中國政府絕望選擇自殺','other');
insert into dcard_article_test(article_id, question, question_type) values(817596,'有哪個學校會在這個時間才期中考嗎??','other');
insert into dcard_article_test(article_id, question, question_type) values(225268128,'如何一秒讓店員崩潰','other');
insert into dcard_article_test(article_id, question, question_type) values(226997478,'推薦的單字本','other');
insert into dcard_article_test(article_id, question, question_type) values(214888601,'女生的襪子與內衣顏色有相對關係嗎?','other');
insert into dcard_article_test(article_id, question, question_type) values(45324,'覺得自己中邪了怎麼辦?','other');
insert into dcard_article_test(article_id, question, question_type) values(228520358,'2018/03/20來台灣大學管理學院一館 ： 新創實習就在「書窩」- 書籍管理 | 就近借書 | 以書會友','other');
insert into dcard_article_test(article_id, question, question_type) values(54634932,'人文科系畢業生 不吃香','other');
insert into dcard_article_test(article_id, question, question_type) values

insert into dcard_article_test(article_id, question, question_type) values(224386475,'RE:關於機車路權','other');
insert into dcard_article_test(article_id, question, question_type) values(226306607,'高雄議員蕭永達點名 狼師就是補教名師陳國星','other');
insert into dcard_article_test(article_id, question, question_type) values(227211906,'南投草屯東森房屋行政秘書','other');
insert into dcard_article_test(article_id, question, question_type) values(224944631,'脂漏性皮膚炎','other');
insert into dcard_article_test(article_id, question, question_type) values(1124745,'「台灣已實質獨立」 李登輝：從未主張台獨','other');
insert into dcard_article_test(article_id, question, question_type) values(227710890,'請問韓文','other');
insert into dcard_article_test(article_id, question, question_type) values(229236401,'Harry Kane真的很強嗎','other');
insert into dcard_article_test(article_id, question, question_type) values(225832994,'中友古拉爵工作情況','other');
insert into dcard_article_test(article_id, question, question_type) values(229194839,'以太幣','other');
insert into dcard_art

insert into dcard_article_test(article_id, question, question_type) values(225320724,'請益：聖誕交換禮物該買什麼','other');
insert into dcard_article_test(article_id, question, question_type) values(229882087,'假如這在台灣','other');
insert into dcard_article_test(article_id, question, question_type) values(230218307,'適合理財新手的線上開戶銀行','other');
insert into dcard_article_test(article_id, question, question_type) values(1054012,'身份證後面幹嘛一定要臺灣省','other');
insert into dcard_article_test(article_id, question, question_type) values(224513014,'未來到底怎麼買房','other');
insert into dcard_article_test(article_id, question, question_type) values(1201058,'大家什麼時候開始覺得有代溝','when');
insert into dcard_article_test(article_id, question, question_type) values(230327761,'韓總效應，高雄遊客增加','other');
insert into dcard_article_test(article_id, question, question_type) values(230342483,'終於鬧夠了齁','other');
insert into dcard_article_test(article_id, question, question_type) values(225797244,'安全褲外露比較好？','other');
insert into dcard_article_test(

insert into dcard_article_test(article_id, question, question_type) values(226028712,'有請法律系大大解惑--關於車禍事後報案','other');
insert into dcard_article_test(article_id, question, question_type) values(128575067,'SunEdison將聲請破產','other');
insert into dcard_article_test(article_id, question, question_type) values(82393341,'資管vs資工 傻傻分不清','other');
insert into dcard_article_test(article_id, question, question_type) values(383072,'大家覺得紐幣可以買入了嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(170350,'為什麼我一定要有腰啊？','why');
insert into dcard_article_test(article_id, question, question_type) values(230404688,'重磅！習近平提出5項主張','other');
insert into dcard_article_test(article_id, question, question_type) values(308413,'女生打呼?','other');
insert into dcard_article_test(article_id, question, question_type) values(10442817,'街頭藝人之歐巴','other');
insert into dcard_article_test(article_id, question, question_type) values(226924011,'想應徵服飾店，有什麼撇步嗎？','other');
insert into dcard_articl

insert into dcard_article_test(article_id, question, question_type) values(224587259,'有關推甄資料的問題','other');
insert into dcard_article_test(article_id, question, question_type) values(1156846,'咖啡因中毒者','other');
insert into dcard_article_test(article_id, question, question_type) values(225528114,'時光膠囊版','other');
insert into dcard_article_test(article_id, question, question_type) values(226407698,' 公共政策提案-台灣遊戲的法規建置  ','other');
insert into dcard_article_test(article_id, question, question_type) values(226619882,'🇬🇧英國在台辦事處實習','other');
insert into dcard_article_test(article_id, question, question_type) values(376497,'有沒有人知道王品的醋是哪一家的','other');
insert into dcard_article_test(article_id, question, question_type) values(228967199,'高雄雅思補習班','other');
insert into dcard_article_test(article_id, question, question_type) values(224792349,'七八店長😠😠😠','other');
insert into dcard_article_test(article_id, question, question_type) values(143453168,'收到免役的那一刻','other');
insert into dcard_article_test(artic

insert into dcard_article_test(article_id, question, question_type) values(229752865,'關於麥當勞','other');
insert into dcard_article_test(article_id, question, question_type) values(224978293,' 渡邊直美擅長的語言','other');
insert into dcard_article_test(article_id, question, question_type) values(228372283,'學泰文','other');
insert into dcard_article_test(article_id, question, question_type) values(131938857,'誰要跟我們併校','other');
insert into dcard_article_test(article_id, question, question_type) values(229153673,'LIVE || E組賽事：塞爾維亞vs巴西','other');
insert into dcard_article_test(article_id, question, question_type) values(400825,'問 台中西屯區 美食','other');
insert into dcard_article_test(article_id, question, question_type) values(224956311,' mos burger長期工讀生','other');
insert into dcard_article_test(article_id, question, question_type) values(16381,'這幾天的觀感','how_many');
insert into dcard_article_test(article_id, question, question_type) values(912137,'一個人的夜店只能....','other');
insert into dcard_article_test(arti

insert into dcard_article_test(article_id, question, question_type) values(226924376,'台中工作都沒有休息時間嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(226656093,'請益JTB','other');
insert into dcard_article_test(article_id, question, question_type) values(1096977,'一天最多上幾次大號?','how_many');
insert into dcard_article_test(article_id, question, question_type) values(226127236,'#特勤警察訓練','other');
insert into dcard_article_test(article_id, question, question_type) values(152514916,'短評－新政府還廢死嗎','other');
insert into dcard_article_test(article_id, question, question_type) values(230677737,'轉ptt文。這個有點好笑','other');
insert into dcard_article_test(article_id, question, question_type) values(629992,'阿姨我會嫁不出去啊啊啊！','other');
insert into dcard_article_test(article_id, question, question_type) values(891545,'三讀通過人人可預立拒絕醫療','other');
insert into dcard_article_test(article_id, question, question_type) values(227662026,'台中Salute美式飛鏢酒吧徵人','other');
insert into dcard_article

insert into dcard_article_test(article_id, question, question_type) values(225754497,'開罐頭小技巧','other');
insert into dcard_article_test(article_id, question, question_type) values(826755,'有沒有狄卡是現充才在用的八卦','other');
insert into dcard_article_test(article_id, question, question_type) values(230033279,'關於韓語學習你要知道的事','other');
insert into dcard_article_test(article_id, question, question_type) values(224101546,'閃光out','other');
insert into dcard_article_test(article_id, question, question_type) values(230442425,'你必須知道台灣正在發生的事','other');
insert into dcard_article_test(article_id, question, question_type) values(224486332,'小丑女和小丑','other');
insert into dcard_article_test(article_id, question, question_type) values(60080,'矩陣計算 救救我~','other');
insert into dcard_article_test(article_id, question, question_type) values(224504684,'我的同居人','other');
insert into dcard_article_test(article_id, question, question_type) values(224544332,'看不懂的法律專有名詞...','other');
insert into dcard_article_test(article_id,

insert into dcard_article_test(article_id, question, question_type) values(169556666,'我在我爸面前看A片','other');
insert into dcard_article_test(article_id, question, question_type) values(229117626,'好可愛的球迷！！！','other');
insert into dcard_article_test(article_id, question, question_type) values(225057898,'大學邊緣？','other');
insert into dcard_article_test(article_id, question, question_type) values(228842121,'日本航空複試','other');
insert into dcard_article_test(article_id, question, question_type) values(225817846,'籃球鞋 - 乳癌配色','other');
insert into dcard_article_test(article_id, question, question_type) values(224810527,' 我爸買車想記我名下','other');
insert into dcard_article_test(article_id, question, question_type) values(228188726,'阿北外送蛋糕 蛋糕被砸爛','other');
insert into dcard_article_test(article_id, question, question_type) values(64414489,'除了小燈泡 是不是還有別的事要關心？','other');
insert into dcard_article_test(article_id, question, question_type) values(230659277,'地勤是不是奴性代表啊','other');
insert into dcard_article_test

insert into dcard_article_test(article_id, question, question_type) values(229732800,'離職好難開口','other');
insert into dcard_article_test(article_id, question, question_type) values(226418841,'可憐的臺灣','other');
insert into dcard_article_test(article_id, question, question_type) values(951882,'Re:台生與陸生的差別','other');
insert into dcard_article_test(article_id, question, question_type) values(228658223,'中信撥薪戶有哪些？','other');
insert into dcard_article_test(article_id, question, question_type) values(225895282,'#新聞記者採訪isis','other');
insert into dcard_article_test(article_id, question, question_type) values(229259989,'2018FIFA世界足球盃冠軍戰法國贏啦🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷！！！！','other');
insert into dcard_article_test(article_id, question, question_type) values(228863446,'打工經驗分享','other');
insert into dcard_article_test(article_id, question, question_type) values(32747,'文盲農 勇揭餿油','other');
insert into dcard_article_test(article_id, question, question_type) values(893219,'新北立委／民國黨候選人：黨不給「非信眾」資源','other');
insert into dcar

insert into dcard_article_test(article_id, question, question_type) values(95228,'大家手邊有什麼證照哩？','other');
insert into dcard_article_test(article_id, question, question_type) values(472016,'0820','other');
insert into dcard_article_test(article_id, question, question_type) values(228641554,'美女與野獸糖霜餅乾','other');
insert into dcard_article_test(article_id, question, question_type) values(226934473,'薄多義銷魂甜點➡️蜂蜜米克斯披薩','other');
insert into dcard_article_test(article_id, question, question_type) values(226891875,'🍮簡易焦糖布丁奶酪DIY','other');
insert into dcard_article_test(article_id, question, question_type) values(228315113,'運彩分析團隊是真的嗎','other');
insert into dcard_article_test(article_id, question, question_type) values(397846,'黑糖蔓越莓餅乾 ❤','other');
insert into dcard_article_test(article_id, question, question_type) values(54591483,'連勝文籲黨 破除「袁崇煥現象」','other');
insert into dcard_article_test(article_id, question, question_type) values(180802,'吉卜力美術館申請帳號方式 (跪求日文強大的大大','other');
insert into dcard_artic

insert into dcard_article_test(article_id, question, question_type) values(863187,'你有想過大賣場處理退貨牛奶的辛苦嗎?','other');
insert into dcard_article_test(article_id, question, question_type) values(225039083,'雨傘節','other');
insert into dcard_article_test(article_id, question, question_type) values(228793459,'高鐵與航空業，抉擇？','other');
insert into dcard_article_test(article_id, question, question_type) values(230075917,'韓國瑜感念柯文哲 不幫丁守中站台','other');
insert into dcard_article_test(article_id, question, question_type) values(228270453,' 各界捐款整理','other');
insert into dcard_article_test(article_id, question, question_type) values(229672767,'韓文全部連音只剩下ㄴㅁㄹㅇ>ㅂㅈㄷㅅㄱ=ㄸㅉㄲㅃㅆ不知何時變音','when');
insert into dcard_article_test(article_id, question, question_type) values(226683615,'花蓮海洋公園','other');
insert into dcard_article_test(article_id, question, question_type) values(230368655,'台南捷運第一期藍線可行報告 今獲行政院核定','other');
insert into dcard_article_test(article_id, question, question_type) values(229705211,'會帶伴手禮給助理嗎','other');
i

insert into dcard_article_test(article_id, question, question_type) values(230531907,'解夢大樂透號碼','other');
insert into dcard_article_test(article_id, question, question_type) values(226294792,' 中國老濕影評VS古阿莫','other');
insert into dcard_article_test(article_id, question, question_type) values(225824154,'我不知道可以下什麼標題欸','other');
insert into dcard_article_test(article_id, question, question_type) values(475562,'有沒有哪個國家地鐵站設計最好看的卦？','where');
insert into dcard_article_test(article_id, question, question_type) values(230203606,'詢問亞東醫院。護理師/護生','other');
insert into dcard_article_test(article_id, question, question_type) values(139607,'大四畢業然後呢！','other');
insert into dcard_article_test(article_id, question, question_type) values(227801153,'日文排序問題','other');
insert into dcard_article_test(article_id, question, question_type) values(230613060,'銀行工作選擇 台新與花旗','other');
insert into dcard_article_test(article_id, question, question_type) values(230133875,'這次投票，真是讓人失望透頂','other');
insert into dcard_artic

insert into dcard_article_test(article_id, question, question_type) values(226353301,'對於工作薪水要求','other');
insert into dcard_article_test(article_id, question, question_type) values(229172255,'鼎泰豐pt面試的問題 求助！！','other');
insert into dcard_article_test(article_id, question, question_type) values(225413482,'服貿和監督條例到底什麼時候過的？','when');
insert into dcard_article_test(article_id, question, question_type) values(230166042,'開票創最慢紀錄？ 北市選委會：新北市、桃園市更慢','other');
insert into dcard_article_test(article_id, question, question_type) values(230352721,'Passiv mit Modalverben','other');
insert into dcard_article_test(article_id, question, question_type) values(224220942,'Re:網紅朋友的回應','other');
insert into dcard_article_test(article_id, question, question_type) values(230366310,'雅思 寫作成績複查請益','other');
insert into dcard_article_test(article_id, question, question_type) values(227807190,' 共軍機繞不停 又有軍艦擾台','other');
insert into dcard_article_test(article_id, question, question_type) values(181409791,'川普奪印州 將成美國共

insert into dcard_article_test(article_id, question, question_type) values(230499801,'被統一可能發生的事','other');
insert into dcard_article_test(article_id, question, question_type) values(226016512,'進銷存軟體','other');
insert into dcard_article_test(article_id, question, question_type) values(224066630,'天然放大片寶寶 ','other');
insert into dcard_article_test(article_id, question, question_type) values(224345763,'只有大學生才有的特權？','other');
insert into dcard_article_test(article_id, question, question_type) values(36408418,'學者評甘比亞事件：證實北京專搞「烽火外交」','other');
insert into dcard_article_test(article_id, question, question_type) values(229115216,'紐西蘭首在台慶祝毛利新年 原民會主委：深化台紐合作關係','other');
insert into dcard_article_test(article_id, question, question_type) values(127125008,'當你看看到這句話的時候，你會怎麼想','how');
insert into dcard_article_test(article_id, question, question_type) values(226270693,'工讀生權益問題','other');
insert into dcard_article_test(article_id, question, question_type) values(227957904,'紅豆餅阿嬤','other');
insert into

insert into dcard_article_test(article_id, question, question_type) values(227007188,'#討論全台大停電','other');
insert into dcard_article_test(article_id, question, question_type) values(216890973,'向巫氏廚房致敬','other');
insert into dcard_article_test(article_id, question, question_type) values(224565700,'黑特 家樂福','other');
insert into dcard_article_test(article_id, question, question_type) values(14740,'泰國奇葩洗髮水廣告，當我發現同事搖頭很美後','other');
insert into dcard_article_test(article_id, question, question_type) values(230360223,'#美食台東彩色果-失戀美食😭😭😭','other');
insert into dcard_article_test(article_id, question, question_type) values(107116910,'魔鬼藏在細節裡：民進黨版監督條例的六大缺失','other');
insert into dcard_article_test(article_id, question, question_type) values(224626216,'每次吃維力炸醬麵的時候','other');
insert into dcard_article_test(article_id, question, question_type) values(229531251,'蝦皮面試通知','other');
insert into dcard_article_test(article_id, question, question_type) values(30117,'有沒有一男一女走在路上就會被認為是情侶的卦？','other');
insert i

insert into dcard_article_test(article_id, question, question_type) values(255427,'戀愛吧！蜂蜜檸檬烏龍茶！','other');
insert into dcard_article_test(article_id, question, question_type) values(225180967,'隱藏在公園裡的餐廳-J CAFE','other');
insert into dcard_article_test(article_id, question, question_type) values(1153973,'同學你的自介😭','other');
insert into dcard_article_test(article_id, question, question_type) values(229623113,'#面試心得分享','other');
insert into dcard_article_test(article_id, question, question_type) values(225718009,'另一面｜拿「為何不離職」酸台鐵員工的人，正是讓臺灣向下沉淪的罪魁禍首','other');
insert into dcard_article_test(article_id, question, question_type) values(227801964,'徵！飲料店工作夥伴 🌸','other');
insert into dcard_article_test(article_id, question, question_type) values(680400,'FB的照片只能消除標記不能給刪嗎','other');
insert into dcard_article_test(article_id, question, question_type) values(225435696,'年末美食交換禮物','other');
insert into dcard_article_test(article_id, question, question_type) values(15956,'生日提醒功能','other');
insert into d

insert into dcard_article_test(article_id, question, question_type) values(1171683,'金融科技真的很夯嗎?','other');
insert into dcard_article_test(article_id, question, question_type) values(536967,'#國際戰事-「不在此發生，不代表沒有發生。」','other');
insert into dcard_article_test(article_id, question, question_type) values(229765492,'台灣真的找不到韓國血腸嗎','other');
insert into dcard_article_test(article_id, question, question_type) values(81796822,'一種習慣...','other');
insert into dcard_article_test(article_id, question, question_type) values(227640581,'抹茶湯圓+抹茶拿鐵','other');
insert into dcard_article_test(article_id, question, question_type) values(229812685,'年輕世代的國民黨','other');
insert into dcard_article_test(article_id, question, question_type) values(255632,'喇犽恐慌','other');
insert into dcard_article_test(article_id, question, question_type) values(2078790,'爆料公社文化','other');
insert into dcard_article_test(article_id, question, question_type) values(225105021,'關於大頭貼','other');
insert into dcard_article_test(article_id, que

insert into dcard_article_test(article_id, question, question_type) values(229354377,'關於騎樓租地擺放販賣機','other');
insert into dcard_article_test(article_id, question, question_type) values(91676,'金田一少年事件簿','other');
insert into dcard_article_test(article_id, question, question_type) values(225395977,'紫禁城來著的交換禮物','other');
insert into dcard_article_test(article_id, question, question_type) values(225511175,'天啊 最近吃什麼都那麼貴！','other');
insert into dcard_article_test(article_id, question, question_type) values(954943,'香港銅鑼灣書店員工相繼失蹤　朱立倫：中方要給答案','other');
insert into dcard_article_test(article_id, question, question_type) values(1113471,'瘦不下來','other');
insert into dcard_article_test(article_id, question, question_type) values(229378788,'风生水起逐浪高——党的十九大以来以习近平同志为核心的党中央坚定不移推进全面深化改革述评','other');
insert into dcard_article_test(article_id, question, question_type) values(228170224,'議員誣指狼師　出庭反嗆「他承認是狼人我就道歉」','other');
insert into dcard_article_test(article_id, question, question_type) values(228843803,'＃心聲

insert into dcard_article_test(article_id, question, question_type) values(651334,'沒有! 你只想到你自己','other');
insert into dcard_article_test(article_id, question, question_type) values(230339354,'無塵室日文口譯','other');
insert into dcard_article_test(article_id, question, question_type) values(228955014,'持續關注新聞的這群人是習慣、巧合，還是浪費時間？','other');
insert into dcard_article_test(article_id, question, question_type) values(704396,'胖瘦的標準','other');
insert into dcard_article_test(article_id, question, question_type) values(227765102,'網拍賣家徵PT直播主','other');
insert into dcard_article_test(article_id, question, question_type) values(225856442,'你的大頭照可以換掉','other');
insert into dcard_article_test(article_id, question, question_type) values(196369,'PAD一單抽不夠?那你有沒有抽第二單?','other');
insert into dcard_article_test(article_id, question, question_type) values(263162,'或許會有那麼一種人','other');
insert into dcard_article_test(article_id, question, question_type) values(230664024,'過年連假還要工作的心情','other');
insert into dcard_article

insert into dcard_article_test(article_id, question, question_type) values(230666434,'改革風帆勁 創新逐浪高——上海落實習近平總書記全國兩會重要講話精神紀實','other');
insert into dcard_article_test(article_id, question, question_type) values(1012670,'在學校與放假在家的差別','other');
insert into dcard_article_test(article_id, question, question_type) values(225411975,'美肌時代😱😂','other');
insert into dcard_article_test(article_id, question, question_type) values(224094049,'巧克力汁','other');
insert into dcard_article_test(article_id, question, question_type) values(229466405,'身為新人...','other');
insert into dcard_article_test(article_id, question, question_type) values(225033272,'被迪卡刺了一刀','other');
insert into dcard_article_test(article_id, question, question_type) values(225649411,'換耳環太緊張?','other');
insert into dcard_article_test(article_id, question, question_type) values(228706656,'準備請益','other');
insert into dcard_article_test(article_id, question, question_type) values(959429,'年輕人，記得投票啊','other');
insert into dcard_article_test(ar

insert into dcard_article_test(article_id, question, question_type) values(227499777,'星巴克袖珍商品￼￼','other');
insert into dcard_article_test(article_id, question, question_type) values(930827,'綠：國民黨萬人宴一桌可能要4500元　籲檢方儘速偵辦','other');
insert into dcard_article_test(article_id, question, question_type) values(869043,'有沒有全身潮牌名牌的八卦呢?','other');
insert into dcard_article_test(article_id, question, question_type) values(224151329,'加拿大記者問人權 中國外長王毅痛斥：「中國的人權只有中國人懂」','other');
insert into dcard_article_test(article_id, question, question_type) values(227058711,'社會大學','other');
insert into dcard_article_test(article_id, question, question_type) values(1096454,'別人就不是嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(226050387,'台南哪裡還有起司馬鈴薯','where');
insert into dcard_article_test(article_id, question, question_type) values(39814,'有人和我一樣喜歡捐血嗎XD','other');
insert into dcard_article_test(article_id, question, question_type) values(226009901,' 湯的議題','other');
insert int

insert into dcard_article_test(article_id, question, question_type) values(56152969,' 草莓提拉米蘇之蠱💀','other');
insert into dcard_article_test(article_id, question, question_type) values(230029048,'如果正名公投是用在WTO，而不是奧運，你敢支持嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(132487899,'台北巨蛋懶人包','other');
insert into dcard_article_test(article_id, question, question_type) values(1056303,'設計科未來的工作','other');
insert into dcard_article_test(article_id, question, question_type) values(114431151,'綠委張廖萬堅：希望太陽花尊重主流民意','other');
insert into dcard_article_test(article_id, question, question_type) values(225696855,'高麗菜盛產料理-大阪燒','other');
insert into dcard_article_test(article_id, question, question_type) values(440487,'沒有危機意識的家人們......','other');
insert into dcard_article_test(article_id, question, question_type) values(179170924,'RC 我知道阿，就是 … ','other');
insert into dcard_article_test(article_id, question, question_type) values(134064,'關於勞乃慧老師','other');
insert into 

insert into dcard_article_test(article_id, question, question_type) values(228461151,'#問打工經驗談','other');
insert into dcard_article_test(article_id, question, question_type) values(224728308,'做了一個月工讀想離職','other');
insert into dcard_article_test(article_id, question, question_type) values(101417600,'吳季剛~謝謝你的勇敢','other');
insert into dcard_article_test(article_id, question, question_type) values(1032768,'若拉屎還要別人幫你擦屁?','other');
insert into dcard_article_test(article_id, question, question_type) values(229627689,'艋舺 X記肉粥','other');
insert into dcard_article_test(article_id, question, question_type) values(226042703,'簡單好吃的馬鈴薯煎餅','other');
insert into dcard_article_test(article_id, question, question_type) values(224754491,'7-11到底算不算服務業','other');
insert into dcard_article_test(article_id, question, question_type) values(228973528,'關於面試的問題','other');
insert into dcard_article_test(article_id, question, question_type) values(224568229,'小七夏日消暑聖品','other');
insert into dcard_article_test(articl

insert into dcard_article_test(article_id, question, question_type) values(225363793,'求解！一包泡麵比一碗現煮牛肉麵還貴是什麼狀況？很好吃嗎？','what');
insert into dcard_article_test(article_id, question, question_type) values(228946127,' 如何因應台灣與各國相繼斷交','other');
insert into dcard_article_test(article_id, question, question_type) values(227143851,'高雄三民#晚班工讀兼職','other');
insert into dcard_article_test(article_id, question, question_type) values(227705765,'平價吃到飽火鍋推薦','other');
insert into dcard_article_test(article_id, question, question_type) values(792152,'肌肉VS肥肉VS排骨','other');
insert into dcard_article_test(article_id, question, question_type) values(84543544,'如何選機車！','other');
insert into dcard_article_test(article_id, question, question_type) values(1229973,'睡不著woo一下','other');
insert into dcard_article_test(article_id, question, question_type) values(225481022,'去某文具行面試感覺意義不明','other');
insert into dcard_article_test(article_id, question, question_type) values(224238817,'火腿起司蛋燉飯','other');
insert into dcard_a

insert into dcard_article_test(article_id, question, question_type) values(224833190,'輔大心理新生現在在想什麼','other');
insert into dcard_article_test(article_id, question, question_type) values(229984623,'㊙️分享三組多益最愛考的分類字彙','other');
insert into dcard_article_test(article_id, question, question_type) values(230690431,'蔡英文臉書：有人問我，要把國家帶去哪裡','where');
insert into dcard_article_test(article_id, question, question_type) values(645587,'消夜要吃甚麼','other');
insert into dcard_article_test(article_id, question, question_type) values(1195166,'DCRAD很多幻想文?','other');
insert into dcard_article_test(article_id, question, question_type) values(225227754,'反同最經典語錄','other');
insert into dcard_article_test(article_id, question, question_type) values(105790,'關於天蠍男','other');
insert into dcard_article_test(article_id, question, question_type) values(228922433,'實用履歷模板大比拚','other');
insert into dcard_article_test(article_id, question, question_type) values(34669,'穿梭-14 充滿後悔的信','other');
insert into dcard_article_test(ar

insert into dcard_article_test(article_id, question, question_type) values(224256290,'新北市青年數位服務種子培力營-邀請大家參加','other');
insert into dcard_article_test(article_id, question, question_type) values(230097216,'日文謝謝到底要不要加敬詞','other');
insert into dcard_article_test(article_id, question, question_type) values(1031997,'炎亞綸下雪夢成真 籲粉絲做環保「愛地球」','other');
insert into dcard_article_test(article_id, question, question_type) values(119708,'朋友的喜酒該怎麼拒絕？','how');
insert into dcard_article_test(article_id, question, question_type) values(224841528,'台灣人的素質','other');
insert into dcard_article_test(article_id, question, question_type) values(225346264,'金屬探測器','other');
insert into dcard_article_test(article_id, question, question_type) values(219607,'十五分鐘的晚安','other');
insert into dcard_article_test(article_id, question, question_type) values(229298194,'《疫苗之殤》的續集還要上演幾部？這次25萬支假疫苗已註射孩子體中，告訴我怎麽忍？','how_many');
insert into dcard_article_test(article_id, question, question_type) values(226602034,'郭董說：沒有必要 儘量不想回台

insert into dcard_article_test(article_id, question, question_type) values(831867,'超越「滅頂」行情　林鳳營3大1小189元','other');
insert into dcard_article_test(article_id, question, question_type) values(378720,'《人來人網》 ch.1 阿玲/玲兒','other');
insert into dcard_article_test(article_id, question, question_type) values(224232773,'安南醫院','other');
insert into dcard_article_test(article_id, question, question_type) values(87387,'約出來滑手機?!','other');
insert into dcard_article_test(article_id, question, question_type) values(227548801,'中共新領導成員，各位臺灣朋友有何看法','other');
insert into dcard_article_test(article_id, question, question_type) values(508406,'工資/精神/保險賠償問題','other');
insert into dcard_article_test(article_id, question, question_type) values(82669,'情人節滑神魔','other');
insert into dcard_article_test(article_id, question, question_type) values(840770,'關於藝人的穿著','other');
insert into dcard_article_test(article_id, question, question_type) values(563929,'先知! 「戴小姐是誰？　蔡正元：柯文哲心裡有數」','who');
insert into dcard_article_t

insert into dcard_article_test(article_id, question, question_type) values(224660687,'關於學店','other');
insert into dcard_article_test(article_id, question, question_type) values(107552,'哪裡可以買到運動褲呢','where');
insert into dcard_article_test(article_id, question, question_type) values(229830625,'#寶雅打工','other');
insert into dcard_article_test(article_id, question, question_type) values(229893514,'普悠瑪列車出軌翻車！傳30傷3罹難','other');
insert into dcard_article_test(article_id, question, question_type) values(34677234,'新竹/甜點/一百種味道','other');
insert into dcard_article_test(article_id, question, question_type) values(227678254,'原來外表會騙人','other');
insert into dcard_article_test(article_id, question, question_type) values(1022278,'沒車是不是很難脫魯啊...','other');
insert into dcard_article_test(article_id, question, question_type) values(225369236,'閒來無事','other');
insert into dcard_article_test(article_id, question, question_type) values(224069475,'後備軍人注意！　國防部要找「周末戰士」','other');
insert into dcard_article_test(art

insert into dcard_article_test(article_id, question, question_type) values(229364409,'美国想要讹诈中国，总得自己先有底气吧','other');
insert into dcard_article_test(article_id, question, question_type) values(59484,'走路進汽車旅館????','other');
insert into dcard_article_test(article_id, question, question_type) values(229853766,'淡水仿星XX-工作注意！#工作','other');
insert into dcard_article_test(article_id, question, question_type) values(136187170,'反寶寶？','other');
insert into dcard_article_test(article_id, question, question_type) values(226670199,'麥當勞外送車禍問題','other');
insert into dcard_article_test(article_id, question, question_type) values(225256813,'懷舊年代','other');
insert into dcard_article_test(article_id, question, question_type) values(226695365,'台灣人到底想要什麼樣的未來呢？','other');
insert into dcard_article_test(article_id, question, question_type) values(12490,'大家有沒有甚麼東西是沒用可是又捨不得丟的啊?','other');
insert into dcard_article_test(article_id, question, question_type) values(80916,'UBIKE可以一直借不用等15分??','other');
insert into dc

insert into dcard_article_test(article_id, question, question_type) values(228178287,'2018.1.21 漲到你麻木、視障也起舞，台股突破一萬一！','other');
insert into dcard_article_test(article_id, question, question_type) values(172957,'尋找課輔服務志工','other');
insert into dcard_article_test(article_id, question, question_type) values(228210247,'中筋麵粉有蟲蟲','other');
insert into dcard_article_test(article_id, question, question_type) values(226186678,'林口 工廠正職員工薪32k','other');
insert into dcard_article_test(article_id, question, question_type) values(228630891,'為什麼你不是網紅？','why');
insert into dcard_article_test(article_id, question, question_type) values(224216139,'關於男生的困擾😩','other');
insert into dcard_article_test(article_id, question, question_type) values(841639,'Dcard 朋友，三個月終於破蛋!','other');
insert into dcard_article_test(article_id, question, question_type) values(227006639,'美食交換❤️','other');
insert into dcard_article_test(article_id, question, question_type) values(11802,'被下了老闆的心錨','other');
insert into dcard_articl

insert into dcard_article_test(article_id, question, question_type) values(21110,'連四個....','other');
insert into dcard_article_test(article_id, question, question_type) values(750512,'看了好多星座問題，想到一件事','other');
insert into dcard_article_test(article_id, question, question_type) values(227779106,'拉丁文 ','other');
insert into dcard_article_test(article_id, question, question_type) values(224616219,'汽車駕照 學科','other');
insert into dcard_article_test(article_id, question, question_type) values(1462377,'上廁所滑手機','other');
insert into dcard_article_test(article_id, question, question_type) values(230289560,'習近平掛掉，中國會不會打內戰？','other');
insert into dcard_article_test(article_id, question, question_type) values(1083549,'孝敬家裡兩老的晚餐','other');
insert into dcard_article_test(article_id, question, question_type) values(225459502,'還沒收到聖誕節交換禮物','other');
insert into dcard_article_test(article_id, question, question_type) values(225025513,'高鐵站務員 體檢通知','other');
insert into dcard_article_test(article_id, que

insert into dcard_article_test(article_id, question, question_type) values(224272195,'一例一休對醫療人員有影響嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(168380668,'關於威x斯美語 求救文','other');
insert into dcard_article_test(article_id, question, question_type) values(451582,'大台北好玩','other');
insert into dcard_article_test(article_id, question, question_type) values(225880270,'買一送一','other');
insert into dcard_article_test(article_id, question, question_type) values(230265538,'最無奈電影拍下台北「脫貧」之難','other');
insert into dcard_article_test(article_id, question, question_type) values(136797193,'今天是的美好的節日','other');
insert into dcard_article_test(article_id, question, question_type) values(351486,'12.8萬，到底貴不貴?','other');
insert into dcard_article_test(article_id, question, question_type) values(229351057,'民選總統誰經濟搞的最好？','other');
insert into dcard_article_test(article_id, question, question_type) values(224598096,'打工','other');
insert into dcard_article_test(article_i

insert into dcard_article_test(article_id, question, question_type) values(224065975,'在報告上塗鴉😨😨','other');
insert into dcard_article_test(article_id, question, question_type) values(396431,'床墊要怎麼清洗','how');
insert into dcard_article_test(article_id, question, question_type) values(225584323,'CSGO中常用的英文與俄文 part 2','other');
insert into dcard_article_test(article_id, question, question_type) values(229094742,'臺中茶葉炒飯','other');
insert into dcard_article_test(article_id, question, question_type) values(27194,'有沒有常在鬧鐘響的前一刻醒來的八卦~','other');
insert into dcard_article_test(article_id, question, question_type) values(224180512,'張韶涵','who');
insert into dcard_article_test(article_id, question, question_type) values(226949308,'如果你有權，你會怎麼做','how');
insert into dcard_article_test(article_id, question, question_type) values(226618821,'日式🍣楠梓森川丼丼食記','other');
insert into dcard_article_test(article_id, question, question_type) values(229894154,'請問 好市多培伯莉小魚餅乾','other');
insert into dcard_article_test(art

insert into dcard_article_test(article_id, question, question_type) values(229309417,'美妝店打工','other');
insert into dcard_article_test(article_id, question, question_type) values(10423,'多少人把這邊當相親網站 ?','how_many');
insert into dcard_article_test(article_id, question, question_type) values(225541228,'到底值多少錢？','how_many');
insert into dcard_article_test(article_id, question, question_type) values(229878909,'Selene屏東縣長最新民調》蘇清泉支持度39.8%　領先潘孟安的36.4%　50%縣民贊成換黨做做看','other');
insert into dcard_article_test(article_id, question, question_type) values(1166734,' 小熊軟糖','other');
insert into dcard_article_test(article_id, question, question_type) values(229736731,'想問這是哪家店','other');
insert into dcard_article_test(article_id, question, question_type) values(542273,'晚餐~香菇燉雞   &  蛋白霜餅乾~ 假的馬卡龍XD','other');
insert into dcard_article_test(article_id, question, question_type) values(226663348,'安普蕾修','other');
insert into dcard_article_test(article_id, question, question_type) values(224425781,'南韓挾FTA優勢 台塑化成品

insert into dcard_article_test(article_id, question, question_type) values(224077308,'網友爆學生服儀不整被記過 教官還嗆「誰要遵守法令」','other');
insert into dcard_article_test(article_id, question, question_type) values(708340,'Such A Great 縫！！😱','other');
insert into dcard_article_test(article_id, question, question_type) values(80727428,'高雄人都北上工作了嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(1038754,'遠傳入主中嘉為什麼不好?','why');
insert into dcard_article_test(article_id, question, question_type) values(230651848,'幫推薦較不苦澀紅酒','other');
insert into dcard_article_test(article_id, question, question_type) values(226698462,'哪個時間去麥當勞應徵比較好?','other');
insert into dcard_article_test(article_id, question, question_type) values(230551214,'國中英文用書','other');
insert into dcard_article_test(article_id, question, question_type) values(227389707,'全職投資者-投資概念篇','other');
insert into dcard_article_test(article_id, question, question_type) values(225882139,'美食','what');
insert into dcard_ar

insert into dcard_article_test(article_id, question, question_type) values(226941755,'學了第二外語，才知道什麼是真正的學語言','what');
insert into dcard_article_test(article_id, question, question_type) values(225118134,'北大無極限','other');
insert into dcard_article_test(article_id, question, question_type) values(228629923,'台中早餐食記🍴','other');
insert into dcard_article_test(article_id, question, question_type) values(229943086,'沖繩','what');
insert into dcard_article_test(article_id, question, question_type) values(229600692,'最蠢面試經歷part 2','other');
insert into dcard_article_test(article_id, question, question_type) values(227933784,'#英文','other');
insert into dcard_article_test(article_id, question, question_type) values(224044768,'現在vs過去的差別','other');
insert into dcard_article_test(article_id, question, question_type) values(225123472,'時力未受邀協商？老柯還嗆黃國昌「別理他！」','other');
insert into dcard_article_test(article_id, question, question_type) values(987975,'咖啡慕斯塔','other');
insert into dcard_article_test(article_i

insert into dcard_article_test(article_id, question, question_type) values(166532,'有偶像夢想的女同胞們照過來~','other');
insert into dcard_article_test(article_id, question, question_type) values(200791594,'請問國外adidas nmd R1 哪裡買？','where');
insert into dcard_article_test(article_id, question, question_type) values(228539082,'在台基本收入可能成真?','other');
insert into dcard_article_test(article_id, question, question_type) values(962575,'公職問題','other');
insert into dcard_article_test(article_id, question, question_type) values(225674935,'妳給的不一定是學生想要的','other');
insert into dcard_article_test(article_id, question, question_type) values(224316284,'有點小餓-吐司披薩🍕','other');
insert into dcard_article_test(article_id, question, question_type) values(224754903,'中壢有甚麼好吃的啊??😕','other');
insert into dcard_article_test(article_id, question, question_type) values(229872068,'法文 分享練習短文','other');
insert into dcard_article_test(article_id, question, question_type) values(226121995,' 生命禮儀','other');
insert into dcard_article

insert into dcard_article_test(article_id, question, question_type) values(225328616,'VANS鞋子的紅色吊牌','other');
insert into dcard_article_test(article_id, question, question_type) values(994629,'大家都上當了！道歉影片根本就是催票招數','other');
insert into dcard_article_test(article_id, question, question_type) values(111136711,'女生請不要隨便露出笑容  ','other');
insert into dcard_article_test(article_id, question, question_type) values(228493223,'新制多益 這本幫助大嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(224730265,'金生麗水 報到前被放鳥','other');
insert into dcard_article_test(article_id, question, question_type) values(225012544,'有空可以來探班？','other');
insert into dcard_article_test(article_id, question, question_type) values(67820,'鴻海刊廣告 限柯48小時內還「台北秋葉原」清白','other');
insert into dcard_article_test(article_id, question, question_type) values(765543,'史努比拉花片','other');
insert into dcard_article_test(article_id, question, question_type) values(227283678,'李應元︰台灣電價太便宜','other');
insert into dc

insert into dcard_article_test(article_id, question, question_type) values(227241592,'麻煩一下各位翻譯蒟蒻','other');
insert into dcard_article_test(article_id, question, question_type) values(224406969,'B70 鏟子冰淇淋','other');
insert into dcard_article_test(article_id, question, question_type) values(215080,'吃個東西嘖嘖嘖','other');
insert into dcard_article_test(article_id, question, question_type) values(225503229,'Re: 一國還是兩國???','other');
insert into dcard_article_test(article_id, question, question_type) values(224230883,'無法忍受的英文錯誤','other');
insert into dcard_article_test(article_id, question, question_type) values(11047,'這樣也能扯到DCARD =口=','other');
insert into dcard_article_test(article_id, question, question_type) values(745289,'翻開覆蓋的陷阱卡#2697','other');
insert into dcard_article_test(article_id, question, question_type) values(224306218,'極品也會過期','other');
insert into dcard_article_test(article_id, question, question_type) values(224687745,'一輩子的學姊學弟','other');
insert into dcard_article_test(article

insert into dcard_article_test(article_id, question, question_type) values(225715565,'朋友 還當的成嗎','other');
insert into dcard_article_test(article_id, question, question_type) values(230219308,'各式各樣免費兌換 l 生日霸王餐','other');
insert into dcard_article_test(article_id, question, question_type) values(224046239,'要哥哥不要麻麻','other');
insert into dcard_article_test(article_id, question, question_type) values(2433136,'洪姊姊說台中電廠產生的PM2.5佔總量的34%耶','other');
insert into dcard_article_test(article_id, question, question_type) values(225169315,'據說好用的脫單方式','other');
insert into dcard_article_test(article_id, question, question_type) values(224365993,'南海各國軍力介紹－海軍篇','other');
insert into dcard_article_test(article_id, question, question_type) values(225107398,'拍扁麵包師 一天內遭2單位登門稽查','other');
insert into dcard_article_test(article_id, question, question_type) values(229953181,'  國家主席習近平同中華全國總工會新一屆領導班子成員集體談話並發表重要講話','other');
insert into dcard_article_test(article_id, question, question_type) values(25139,'中國專家還原

insert into dcard_article_test(article_id, question, question_type) values(224767257,'系邊是....？','other');
insert into dcard_article_test(article_id, question, question_type) values(228816447,'五月第一天班想蹺班然後離職會扣錢嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(150557768,'鄭捷的死刑，廢死聯盟也有責任！','other');
insert into dcard_article_test(article_id, question, question_type) values(224300561,'一個月的甜點時光','other');
insert into dcard_article_test(article_id, question, question_type) values(510658,'團康競賽後的懲罰方式？','other');
insert into dcard_article_test(article_id, question, question_type) values(468337,'我幫妳領養了一個小孩','other');
insert into dcard_article_test(article_id, question, question_type) values(320454,'期中考V.S期末考','other');
insert into dcard_article_test(article_id, question, question_type) values(225731979,'關於你一生中的工作選擇','other');
insert into dcard_article_test(article_id, question, question_type) values(230145076,'臺灣人啊，請想一想......','other');
insert into dcard_arti

insert into dcard_article_test(article_id, question, question_type) values(172314972,'韓之棧韓式傳統料理～～so spicy','other');
insert into dcard_article_test(article_id, question, question_type) values(190317134,'母親節送到送歪歪!!!','other');
insert into dcard_article_test(article_id, question, question_type) values(51542884,'難道是命中注定？','other');
insert into dcard_article_test(article_id, question, question_type) values(108895534,'離譜大學研究員　教14歲女兒同學課業教上床','other');
insert into dcard_article_test(article_id, question, question_type) values(228295642,'工讀實習有年終嗎？','other');
insert into dcard_article_test(article_id, question, question_type) values(226030590,'關於送禮','other');
insert into dcard_article_test(article_id, question, question_type) values(27771484,'怎麼寫阿阿阿','how');
insert into dcard_article_test(article_id, question, question_type) values(224044102,'我的手機應用程式','other');
insert into dcard_article_test(article_id, question, question_type) values(227964480,'聖誕禮物交換','other');
insert into dcard_article_test

insert into dcard_article_test(article_id, question, question_type) values(228920198,'德明財經外的雞蛋糕','other');
insert into dcard_article_test(article_id, question, question_type) values(83200424,' 抹茶紅豆蛋糕捲','other');
insert into dcard_article_test(article_id, question, question_type) values(99739803,'宿舍百態','other');
insert into dcard_article_test(article_id, question, question_type) values(224526667,'「立委不知民間疾苦」教官若退出校園 私校師很焦慮','other');
insert into dcard_article_test(article_id, question, question_type) values(227080253,'請問如何找打工？','how');
insert into dcard_article_test(article_id, question, question_type) values(230610586,'非核家園終究是白日作夢','other');
insert into dcard_article_test(article_id, question, question_type) values(53056,'關於Spotify 有幾個小問題','how_many');
insert into dcard_article_test(article_id, question, question_type) values(228576584,'習近平同金正恩舉行會談','other');
insert into dcard_article_test(article_id, question, question_type) values(229660529,'打工面試','other');
insert into dcard_article_te

insert into dcard_article_test(article_id, question, question_type) values(227845816,'電子鍋還是快鍋','other');
insert into dcard_article_test(article_id, question, question_type) values(228978851,'長榮地勤 #營業人員','other');
insert into dcard_article_test(article_id, question, question_type) values(229104400,'問 iCash 2.0','other');
insert into dcard_article_test(article_id, question, question_type) values(224461000,'爸爸的背❤','other');
insert into dcard_article_test(article_id, question, question_type) values(224530575,'江春男因酒駕請辭　蔡英文：人民對執政團隊要求很高','other');
insert into dcard_article_test(article_id, question, question_type) values(230676015,'要看情況才能發揮的同理心？','other');
insert into dcard_article_test(article_id, question, question_type) values(1032900,'吵死了~~~~','other');
insert into dcard_article_test(article_id, question, question_type) values(228201200,'求解😭Ampule碎在藥裡','other');
insert into dcard_article_test(article_id, question, question_type) values(224384638,'圖*五步驟簡易食譜*單人獨享餐🙆','other');
insert into dc

insert into dcard_article_test(article_id, question, question_type) values(230500492,'藏壽司','other');
insert into dcard_article_test(article_id, question, question_type) values(226547192,'請教英文翻譯問題','other');
insert into dcard_article_test(article_id, question, question_type) values(228952699,'蜜橙磅蛋糕','other');
insert into dcard_article_test(article_id, question, question_type) values(228673187,'剛到職，卻想離職！','other');
insert into dcard_article_test(article_id, question, question_type) values(229950650,'其邁哥的政見 一通電話醫生護士到家服務','other');
insert into dcard_article_test(article_id, question, question_type) values(226695405,'問 工時問題','other');
insert into dcard_article_test(article_id, question, question_type) values(761886,'只關心巴黎錯了嗎？　－　請別苛責只關心巴黎的你我','other');
insert into dcard_article_test(article_id, question, question_type) values(228814961,'BAKE CODE 烘焙密碼西湖店','other');
insert into dcard_article_test(article_id, question, question_type) values(706757,'Re 陸生健保','other');
insert into dcard_article_



In [None]:
cql = 'select * from dcard_article_test;'
pd_df = dao.execCQLSelectToPandasDF(HELPER_KEYSPACE, cql)
pd_df