In [14]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, concat_ws
from classifier import make_udf, predict_serie, make_pandas_udf
import pandas as pd

In [3]:
spark = SparkSession.builder.appName('mysession').getOrCreate()

In [None]:
#spark.sparkContext.addFile('persist/ft_tuned.ftz')
#spark.sparkContext.addPyFile('./classifier.py')

# Generating Test File

In [None]:
! head -10 data/test

In [None]:
def keep_sentence_field(line):
    '''
    Function to keep only the text input given a labeled instance with fastText format.
    Example
    Input:
    '__label__python __label__django help with unit testing in a python app using django'
    Output:
    'help with unit testing in a python app using django'
    '''
    words = [x for x in line.split() if "__label__" not in x]
    output = ' '.join(words)
    return output

# Location of input file
inputFile = 'data/test'

# Define Python generators to 1) read lines, 2) keep only the sentence field
lines = (line for line in open(inputFile,encoding="ISO-8859-1"))
sentences = (keep_sentence_field(line) for line in lines)

# Location of output file
outputFile = 'data/spark_input'

# Apply the generators and write predictions
with open(outputFile, 'w') as file:
    for sentence in sentences:
        file.write(sentence+'\n')
    file.close()

In [None]:
! head -10 data/spark_input

# Build Spark DataFrame

In [4]:
from pyspark.sql.types import StructType, StructField, StringType, ArrayType

schema = StructType([StructField("input", StringType())])

df_input = spark.read.csv('data/spark_input', header=False, schema=schema)

# Approach 1: standard udf with distributed files

## Single prediction

In [5]:
udf_predict = make_udf(multi_prediction=False)
df_output = df_input.withColumn("category",udf_predict(col("input")))
%timeit -n 20 df_output.sample(False,.10).show(10,False)

+------------------------------------------------------------------------------------+----------+
|input                                                                               |category  |
+------------------------------------------------------------------------------------+----------+
|should i have one class for every database i use                                    |java      |
|getting odd error on net executenonquery                                            |.net      |
|while clause in t sql that loops forever                                            |sql       |
|c # in linux environment                                                            |c#        |
|percentages of subtotal in a report                                                 |c++       |
|what is the best way to determine the number of days in a month with javascript     |javascript|
|stopping msi from launching an exe in the system context                            |.net      |
|specify ordinals of

+-------------------------------------------------------------------------------+----------+
|input                                                                          |category  |
+-------------------------------------------------------------------------------+----------+
|how can i send an array to php through ajax                                    |php       |
|python beyond the basics                                                       |python    |
|how do you deal with connection strings when deploying an asp net site         |asp.net   |
|programmatically launching standalone adobe flashplayer on linux/x11           |windows   |
|how to implement a singleton in c #                                            |c#        |
|encoding problem classic asp                                                   |asp.net   |
|eclipse hide paths in the `` open resource '' dialog                           |java      |
|what design pattern to use for user authentication in java           

+--------------------------------------------------------------------------------------------------------+----------+
|input                                                                                                   |category  |
+--------------------------------------------------------------------------------------------------------+----------+
|c the definitive truth about rand random and arc4random                                                 |c         |
|vector shape on stage appears over dynamic textfield                                                    |c++       |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|sql-server|
|how do you quickly find the url for a win32 api on msdn                                                 |python    |
|c # in linux environment                                                                                |c#        |
|best way to use a db table as a message/job queue      

+---------------------------------------------------------------------------------------------------------------------------+----------+
|input                                                                                                                      |category  |
+---------------------------------------------------------------------------------------------------------------------------+----------+
|transforming selected text with a hotkey                                                                                   |c#        |
|vector shape on stage appears over dynamic textfield                                                                       |c++       |
|best update method for mysql db                                                                                            |mysql     |
|converting svg to png using c #                                                                                            |c#        |
|suspending and notifying threads when th

+-------------------------------------------------------------------------------+----------+
|input                                                                          |category  |
+-------------------------------------------------------------------------------+----------+
|transforming selected text with a hotkey                                       |c#        |
|how can i send an array to php through ajax                                    |php       |
|c the definitive truth about rand random and arc4random                        |c         |
|getting odd error on net executenonquery                                       |.net      |
|carbide / symbian c++ change application icon                                  |c++       |
|doctype rss & html entities                                                    |html      |
|what is the best way to see what files are locked in subversion                |java      |
|vertical text with jquery                                            

+--------------------------------------------------------+----------+
|input                                                   |category  |
+--------------------------------------------------------+----------+
|vector shape on stage appears over dynamic textfield    |c++       |
|getting odd error on net executenonquery                |.net      |
|authoritative source on xml sig                         |.net      |
|while clause in t sql that loops forever                |sql       |
|how do you quickly find the url for a win32 api on msdn |python    |
|db side encryption via nhibernate                       |wcf       |
|daemon threads explanation                              |c#        |
|percentages of subtotal in a report                     |c++       |
|be notified when visual/logical child added/removed     |c#        |
|actionscript3 to javascript communication best practices|javascript|
+--------------------------------------------------------+----------+
only showing top 10 

+-----------------------------------------------------------------------------+----------+
|input                                                                        |category  |
+-----------------------------------------------------------------------------+----------+
|sql server and the guest account what is this for                            |sql-server|
|how do i add custom column to existing wss list template                     |c#        |
|c # lambda expressions or delegates as a properties or arguments             |c#        |
|encoding problem classic asp                                                 |asp.net   |
|css `` see through '' background crazy navigation menu problem               |asp.net   |
|patterns for the overlap of two objects                                      |java      |
|can t create a subversion repository with eclipse 3 4 0 svn 1 5 1            |java      |
|what s the nicest way to do observer/observable in objective c iphone version|c         |

+-----------------------------------------------------------------------------------+----------+
|input                                                                              |category  |
+-----------------------------------------------------------------------------------+----------+
|transforming selected text with a hotkey                                           |c#        |
|sql server and the guest account what is this for                                  |sql-server|
|gantt chart controls on windows forms                                              |windows   |
|building flex projects in ant/nant                                                 |flex      |
|how to generate unit test code for methods                                         |java      |
|how do i extract the version and path from an svn working copy into a nant variable|java      |
|should i provide a deep clone when implementing icloneable                         |java      |
|image archive vs image strip 

+----------------------------------------------------------------------------+----------+
|input                                                                       |category  |
+----------------------------------------------------------------------------+----------+
|how to effectively implement sessions in gae                                |ruby      |
|best update method for mysql db                                             |mysql     |
|sql query count with 0 count                                                |sql       |
|while clause in t sql that loops forever                                    |sql       |
|can you use an alias in the where clause in mysql                           |mysql     |
|is it true that the smallest memory allocation in net is a byte             |.net      |
|css `` see through '' background crazy navigation menu problem              |asp.net   |
|rendered pixel width data for each character in a browser s font            |javascript|
|why is ap

+----------------------------------------------------------------------------------------------+-----------+
|input                                                                                         |category   |
+----------------------------------------------------------------------------------------------+-----------+
|build tar file from directory in php without exec/passthru                                    |php        |
|should i have one class for every database i use                                              |java       |
|how to implement a singleton in c #                                                           |c#         |
|why learn perl python ruby if the company is using c++ c # or java as the application language|c++        |
|post from one controller action to another not redirect                                       |asp.net    |
|db side encryption via nhibernate                                                             |wcf        |
|encoding problem c

+--------------------------------------------------------------------------------------------------------+----------+
|input                                                                                                   |category  |
+--------------------------------------------------------------------------------------------------------+----------+
|transforming selected text with a hotkey                                                                |c#        |
|how can i send an array to php through ajax                                                             |php       |
|how to consume json web services from a windows client                                                  |.net      |
|python beyond the basics                                                                                |python    |
|how to effectively implement sessions in gae                                                            |ruby      |
|can sql server express be used to effectively administr

+----------------------------------------------------------------+-------------+
|input                                                           |category     |
+----------------------------------------------------------------+-------------+
|gantt chart controls on windows forms                           |windows      |
|how to effectively implement sessions in gae                    |ruby         |
|building flex projects in ant/nant                              |flex         |
|best update method for mysql db                                 |mysql        |
|how to disable visual studio macro `` tip '' balloon            |visual-studio|
|doctype rss & html entities                                     |html         |
|db side encryption via nhibernate                               |wcf          |
|converting svg to png using c #                                 |c#           |
|vertical text with jquery                                       |jquery       |
|rendered pixel width data f

+--------------------------------------------------------------+----------+
|input                                                         |category  |
+--------------------------------------------------------------+----------+
|build tar file from directory in php without exec/passthru    |php       |
|python beyond the basics                                      |python    |
|getting odd error on net executenonquery                      |.net      |
|are incrementers / decrementers var++ var etc thread safe     |c++       |
|building flex projects in ant/nant                            |flex      |
|how to implement a singleton in c #                           |c#        |
|how to return a page of results from sql                      |sql-server|
|how to generate unit test code for methods                    |java      |
|vertical text with jquery                                     |jquery    |
|how would you attack this polymorphism string building problem|c#        |
+-----------

+---------------------------------------------------------------------+-------------+
|input                                                                |category     |
+---------------------------------------------------------------------+-------------+
|transforming selected text with a hotkey                             |c#           |
|gantt chart controls on windows forms                                |windows      |
|how do you manage infragistics webgrid data from javascript/ajax code|c#           |
|whats the best way to start using mylyn                              |c#           |
|mac iwork/pages automation                                           |flash        |
|converting svg to png using c #                                      |c#           |
|what is the aspnet_client folder for under the iis structure         |.net         |
|what is the best way to see what files are locked in subversion      |java         |
|rendered pixel width data for each character in a bro

+-------------------------------------------------------------------------------+----------+
|input                                                                          |category  |
+-------------------------------------------------------------------------------+----------+
|prevent long word to add horizontal scroll to html view                        |asp.net   |
|ms sql 2000 turn off logging during stored procedure                           |sql-server|
|converting svg to png using c #                                                |c#        |
|encoding problem classic asp                                                   |asp.net   |
|how do i call net code c # /vb net from vbscript                               |c#        |
|c # winforms datagridview/sql compact negative integer in primary key column   |c#        |
|eclipse hide paths in the `` open resource '' dialog                           |java      |
|what is the best way to determine the number of days in a month with 

+--------------------------------------------------------------------+----------+
|input                                                               |category  |
+--------------------------------------------------------------------+----------+
|making an image greyscale with gd library                           |c#        |
|how does traceroute work                                            |c#        |
|programmatically launching standalone adobe flashplayer on linux/x11|windows   |
|while clause in t sql that loops forever                            |sql       |
|what is the aspnet_client folder for under the iis structure        |.net      |
|what is the best way to see what files are locked in subversion     |java      |
|daemon threads explanation                                          |c#        |
|creating my own iterators                                           |c#        |
|is it true that the smallest memory allocation in net is a byte     |.net      |
|actionscript3 t

+--------------------------------------------------------------------------------------+-------------+
|input                                                                                 |category     |
+--------------------------------------------------------------------------------------+-------------+
|how can i change html attribute names with jquery                                     |jquery       |
|how can i send an array to php through ajax                                           |php          |
|how to pass an unpersisted modified object from view back to controller without a form|c#           |
|carbide / symbian c++ change application icon                                         |c++          |
|building flex projects in ant/nant                                                    |flex         |
|best update method for mysql db                                                       |mysql        |
|how to disable visual studio macro `` tip '' balloon                    

+---------------------------------------------------------------+--------+
|input                                                          |category|
+---------------------------------------------------------------+--------+
|how to consume json web services from a windows client         |.net    |
|prevent long word to add horizontal scroll to html view        |asp.net |
|game programming and event handlers                            |c#      |
|best update method for mysql db                                |mysql   |
|how to implement a singleton in c #                            |c#      |
|how can i get a list of available wireless networks on linux   |python  |
|what is the best way to see what files are locked in subversion|java    |
|daemon threads explanation                                     |c#      |
|eclipse text comparison order                                  |eclipse |
|percentages of subtotal in a report                            |c++     |
+------------------------

+-------------------------------------------------------------------------------+----------+
|input                                                                          |category  |
+-------------------------------------------------------------------------------+----------+
|how can i send an array to php through ajax                                    |php       |
|how do i add custom column to existing wss list template                       |c#        |
|how to effectively implement sessions in gae                                   |ruby      |
|should i have one class for every database i use                               |java      |
|game programming and event handlers                                            |c#        |
|converting svg to png using c #                                                |c#        |
|eclipse hide paths in the `` open resource '' dialog                           |java      |
|what is the best way to determine the number of days in a month with 

+-------------------------------------------------------------+-------------+
|input                                                        |category     |
+-------------------------------------------------------------+-------------+
|how to consume json web services from a windows client       |.net         |
|is filestream lazy loaded in net                             |.net         |
|carbide / symbian c++ change application icon                |c++          |
|how to disable visual studio macro `` tip '' balloon         |visual-studio|
|how can i create prototype methods like javascript in c # net|c#           |
|eclipse text comparison order                                |eclipse      |
|parsing t sql to parameterize a query                        |sql          |
|what are the limits of ruby on rails                         |ruby         |
|using lists in c #                                           |c#           |
|why is visual studio constantly crashing                     |v

## Multi prediction

In [6]:
udf_predict = make_udf(multi_prediction=True)
df_output = df_input.withColumn("category",udf_predict(col("input")))
%timeit -n 20 df_output.sample(False,.10).show(10,False)

+-----------------------------------------------------------------------------------+----------------------------------+
|input                                                                              |category                          |
+-----------------------------------------------------------------------------------+----------------------------------+
|build tar file from directory in php without exec/passthru                         |[php, c++, .net]                  |
|how does traceroute work                                                           |[c#, .net, java]                  |
|authoritative source on xml sig                                                    |[.net, php, c#]                   |
|programmatically launching standalone adobe flashplayer on linux/x11               |[windows, .net, c#]               |
|db side encryption via nhibernate                                                  |[wcf, flash, linq-to-sql]         |
|converting svg to png using c #

+----------------------------------------------------------------------------------------------------------------+------------------------------+
|input                                                                                                           |category                      |
+----------------------------------------------------------------------------------------------------------------+------------------------------+
|how to implement a singleton in c #                                                                             |[c#, .net, c]                 |
|while clause in t sql that loops forever                                                                        |[sql, sql-server, mysql]      |
|implementing and enforcing coding standards                                                                     |[c++, java, c#]               |
|what is the aspnet_client folder for under the iis structure                                                    |[.net, c#,

+-----------------------------------------------------------------------------------------+---------------------------+
|input                                                                                    |category                   |
+-----------------------------------------------------------------------------------------+---------------------------+
|transforming selected text with a hotkey                                                 |[c#, python, asp.net]      |
|getting odd error on net executenonquery                                                 |[.net, c#, asp.net]        |
|carbide / symbian c++ change application icon                                            |[c++, c, windows]          |
|image archive vs image strip                                                             |[css, asp.net-mvc, asp.net]|
|is it true that the smallest memory allocation in net is a byte                          |[.net, c#, asp.net]        |
|how can i convert my current page to pd

+----------------------------------------------------------------------------------------------+-----------------------------+
|input                                                                                         |category                     |
+----------------------------------------------------------------------------------------------+-----------------------------+
|python beyond the basics                                                                      |[python, c++, windows]       |
|are incrementers / decrementers var++ var etc thread safe                                     |[c++, javascript, windows]   |
|why learn perl python ruby if the company is using c++ c # or java as the application language|[c++, c, python]             |
|best way to use a db table as a message/job queue                                             |[java, sql, sql-server]      |
|percentages of subtotal in a report                                                           |[c++, java, c#]

+-------------------------------------------------------------------------------+-----------------------------+
|input                                                                          |category                     |
+-------------------------------------------------------------------------------+-----------------------------+
|gantt chart controls on windows forms                                          |[windows, .net, c#]          |
|carbide / symbian c++ change application icon                                  |[c++, c, windows]            |
|suspending and notifying threads when there is work to do                      |[java, c#, .net]             |
|what is the best way to determine the number of days in a month with javascript|[javascript, java, c++]      |
|image archive vs image strip                                                   |[css, asp.net-mvc, asp.net]  |
|should db layer members be static or instance                                  |[java, asp.net, c#]    

+--------------------------------------------------------------------+-----------------------------------------+
|input                                                               |category                                 |
+--------------------------------------------------------------------+-----------------------------------------+
|prevent long word to add horizontal scroll to html view             |[asp.net, .net, c#]                      |
|is filestream lazy loaded in net                                    |[.net, c#, asp.net]                      |
|while clause in t sql that loops forever                            |[sql, sql-server, mysql]                 |
|c # in linux environment                                            |[c#, c, winforms]                        |
|numbering regex submatches                                          |[php, html, regex]                       |
|parsing t sql to parameterize a query                               |[sql, sql-server, sql-serv

+----------------------------------------------------------------+---------------------------+
|input                                                           |category                   |
+----------------------------------------------------------------+---------------------------+
|making an image greyscale with gd library                       |[c#, php, html]            |
|how can i send an array to php through ajax                     |[php, html, jquery]        |
|building flex projects in ant/nant                              |[flex, ruby, silverlight]  |
|the necessity of hiding the salt for a hash                     |[c++, c#, java]            |
|post from one controller action to another not redirect         |[asp.net, html, css]       |
|how do i call net code c # /vb net from vbscript                |[c#, .net, asp.net]        |
|is it possible to define in a dependent dll s application config|[c#, .net, asp.net]        |
|image archive vs image strip                     

+----------------------------------------------------------------+-----------------------------+
|input                                                           |category                     |
+----------------------------------------------------------------+-----------------------------+
|gantt chart controls on windows forms                           |[windows, .net, c#]          |
|prevent long word to add horizontal scroll to html view         |[asp.net, .net, c#]          |
|whats the best way to start using mylyn                         |[c#, .net, javascript]       |
|getting odd error on net executenonquery                        |[.net, c#, asp.net]          |
|building flex projects in ant/nant                              |[flex, ruby, silverlight]    |
|while clause in t sql that loops forever                        |[sql, sql-server, mysql]     |
|what is the aspnet_client folder for under the iis structure    |[.net, c#, c++]              |
|how can i determine the ip of

+---------------------------------------------------------------------+--------------------------+
|input                                                                |category                  |
+---------------------------------------------------------------------+--------------------------+
|making an image greyscale with gd library                            |[c#, php, html]           |
|how can i change html attribute names with jquery                    |[jquery, javascript, html]|
|how do you manage infragistics webgrid data from javascript/ajax code|[c#, java, .net]          |
|getting odd error on net executenonquery                             |[.net, c#, asp.net]       |
|carbide / symbian c++ change application icon                        |[c++, c, windows]         |
|are incrementers / decrementers var++ var etc thread safe            |[c++, javascript, windows]|
|how to implement a singleton in c #                                  |[c#, .net, c]             |
|how do yo

+-----------------------------------------------------------------------------------+-----------------------------+
|input                                                                              |category                     |
+-----------------------------------------------------------------------------------+-----------------------------+
|sql server and the guest account what is this for                                  |[sql-server, sql, database]  |
|prevent long word to add horizontal scroll to html view                            |[asp.net, .net, c#]          |
|is filestream lazy loaded in net                                                   |[.net, c#, asp.net]          |
|how do you deal with connection strings when deploying an asp net site             |[asp.net, c#, asp.net-mvc]   |
|how to disable visual studio macro `` tip '' balloon                               |[visual-studio, .net, c#]    |
|post from one controller action to another not redirect                

+----------------------------------------------------------------+-------------------------+
|input                                                           |category                 |
+----------------------------------------------------------------+-------------------------+
|how do i add custom column to existing wss list template        |[c#, .net, java]         |
|how to effectively implement sessions in gae                    |[ruby, python, mysql]    |
|should i have one class for every database i use                |[java, c#, sql-server]   |
|how to disable visual studio macro `` tip '' balloon            |[visual-studio, .net, c#]|
|the necessity of hiding the salt for a hash                     |[c++, c#, java]          |
|how to implement a singleton in c #                             |[c#, .net, c]            |
|how can i get a list of available wireless networks on linux    |[python, c#, .net]       |
|c # lambda expressions or delegates as a properties or arguments|[c#,

+--------------------------------------------------------------------------------------+---------------------------+
|input                                                                                 |category                   |
+--------------------------------------------------------------------------------------+---------------------------+
|gantt chart controls on windows forms                                                 |[windows, .net, c#]        |
|build tar file from directory in php without exec/passthru                            |[php, c++, .net]           |
|how do i add custom column to existing wss list template                              |[c#, .net, java]           |
|how to pass an unpersisted modified object from view back to controller without a form|[c#, .net, javascript]     |
|mac iwork/pages automation                                                            |[flash, wcf, flex]         |
|how to disable visual studio macro `` tip '' balloon           

+--------------------------------------------------------------------+---------------------------+
|input                                                               |category                   |
+--------------------------------------------------------------------+---------------------------+
|sql server and the guest account what is this for                   |[sql-server, sql, database]|
|build tar file from directory in php without exec/passthru          |[php, c++, .net]           |
|game programming and event handlers                                 |[c#, .net, asp.net]        |
|mac iwork/pages automation                                          |[flash, wcf, flex]         |
|best update method for mysql db                                     |[mysql, sql, database]     |
|programmatically launching standalone adobe flashplayer on linux/x11|[windows, .net, c#]        |
|ms sql 2000 turn off logging during stored procedure                |[sql-server, sql, database]|
|doctype r

+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|[sql-server, sql, sql-server-2005]|
|carbide / symbian c++ change application icon                                                           |[c++, c, windows]                 |
|how do you quickly find the url for a win32 api on msdn                                                 |[python, c++, c#]                 |
|encoding problem classic asp                                                                            |[asp.net, asp.net-mvc, css]       |
|suspe

+-------------------------------------------------------------+-----------------------------+
|input                                                        |category                     |
+-------------------------------------------------------------+-----------------------------+
|making an image greyscale with gd library                    |[c#, php, html]              |
|how can i send an array to php through ajax                  |[php, html, jquery]          |
|should i have one class for every database i use             |[java, c#, sql-server]       |
|carbide / symbian c++ change application icon                |[c++, c, windows]            |
|ms sql 2000 turn off logging during stored procedure         |[sql-server, sql, database]  |
|implementing and enforcing coding standards                  |[c++, java, c#]              |
|how can i create prototype methods like javascript in c # net|[c#, .net, asp.net]          |
|eclipse text comparison order                              

+----------------------------------------------------------------------------------------------+---------------------------+
|input                                                                                         |category                   |
+----------------------------------------------------------------------------------------------+---------------------------+
|sql server and the guest account what is this for                                             |[sql-server, sql, database]|
|c the definitive truth about rand random and arc4random                                       |[c, c#, c++]               |
|how do i add custom column to existing wss list template                                      |[c#, .net, java]           |
|how to implement a singleton in c #                                                           |[c#, .net, c]              |
|why learn perl python ruby if the company is using c++ c # or java as the application language|[c++, c, python]           |


+--------------------------------------------------------------------------------------+--------------------------+
|input                                                                                 |category                  |
+--------------------------------------------------------------------------------------+--------------------------+
|transforming selected text with a hotkey                                              |[c#, python, asp.net]     |
|how can i send an array to php through ajax                                           |[php, html, jquery]       |
|prevent long word to add horizontal scroll to html view                               |[asp.net, .net, c#]       |
|how do you deal with connection strings when deploying an asp net site                |[asp.net, c#, asp.net-mvc]|
|how to pass an unpersisted modified object from view back to controller without a form|[c#, .net, javascript]    |
|game programming and event handlers                                    

+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|how to effectively implement sessions in gae                                                            |[ruby, python, mysql]             |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|[sql-server, sql, sql-server-2005]|
|doctype rss & html entities                                                                             |[html, asp.net, css]              |
|how do i call net code c # /vb net from vbscript                                                        |[c#, .net, asp.net]               |
|can y

+-------------------------------------------------------------------+----------------------------------+
|input                                                              |category                          |
+-------------------------------------------------------------------+----------------------------------+
|how to effectively implement sessions in gae                       |[ruby, python, mysql]             |
|vector shape on stage appears over dynamic textfield               |[c++, c#, .net]                   |
|how does traceroute work                                           |[c#, .net, java]                  |
|are incrementers / decrementers var++ var etc thread safe          |[c++, javascript, windows]        |
|sql query count with 0 count                                       |[sql, sql-server, sql-server-2005]|
|how do i call net code c # /vb net from vbscript                   |[c#, .net, asp.net]               |
|eclipse hide paths in the `` open resource '' dialog  

+--------------------------------------------------------------------------------------+-----------------------------------------+
|input                                                                                 |category                                 |
+--------------------------------------------------------------------------------------+-----------------------------------------+
|is filestream lazy loaded in net                                                      |[.net, c#, asp.net]                      |
|how to pass an unpersisted modified object from view back to controller without a form|[c#, .net, javascript]                   |
|what is the aspnet_client folder for under the iis structure                          |[.net, c#, c++]                          |
|ant and the available task what if something is not available                         |[java, .net, c#]                         |
|what is the simplest way to find the difference between 2 times in python         

+----------------------------------------------------------------------------------------------+-----------------------+
|input                                                                                         |category               |
+----------------------------------------------------------------------------------------------+-----------------------+
|transforming selected text with a hotkey                                                      |[c#, python, asp.net]  |
|how to consume json web services from a windows client                                        |[.net, windows, c#]    |
|game programming and event handlers                                                           |[c#, .net, asp.net]    |
|mac iwork/pages automation                                                                    |[flash, wcf, flex]     |
|programmatically launching standalone adobe flashplayer on linux/x11                          |[windows, .net, c#]    |
|why learn perl python ruby if t

+--------------------------------------------------------------------+----------------------------------+
|input                                                               |category                          |
+--------------------------------------------------------------------+----------------------------------+
|prevent long word to add horizontal scroll to html view             |[asp.net, .net, c#]               |
|mac iwork/pages automation                                          |[flash, wcf, flex]                |
|programmatically launching standalone adobe flashplayer on linux/x11|[windows, .net, c#]               |
|how do you quickly find the url for a win32 api on msdn             |[python, c++, c#]                 |
|how to generate unit test code for methods                          |[java, c#, .net]                  |
|how can i create prototype methods like javascript in c # net       |[c#, .net, asp.net]               |
|eclipse hide paths in the `` open resource ''

+----------------------------------------------------------------------------+---------------------------+
|input                                                                       |category                   |
+----------------------------------------------------------------------------+---------------------------+
|how can i send an array to php through ajax                                 |[php, html, jquery]        |
|prevent long word to add horizontal scroll to html view                     |[asp.net, .net, c#]        |
|whats the best way to start using mylyn                                     |[c#, .net, javascript]     |
|how do i add custom column to existing wss list template                    |[c#, .net, java]           |
|c # winforms datagridview/sql compact negative integer in primary key column|[c#, .net, winforms]       |
|suspending and notifying threads when there is work to do                   |[java, c#, .net]           |
|vertical text with jquery           

+----------------------------------------------------------------------+---------------------------+
|input                                                                 |category                   |
+----------------------------------------------------------------------+---------------------------+
|how do you deal with connection strings when deploying an asp net site|[asp.net, c#, asp.net-mvc] |
|getting odd error on net executenonquery                              |[.net, c#, asp.net]        |
|doctype rss & html entities                                           |[html, asp.net, css]       |
|post from one controller action to another not redirect               |[asp.net, html, css]       |
|suspending and notifying threads when there is work to do             |[java, c#, .net]           |
|eclipse hide paths in the `` open resource '' dialog                  |[java, c++, c]             |
|vertical text with jquery                                             |[jquery, javascript

# Approach 2: pandas_udf (via PyArrow)

Note: We need to use pyarrow==0.14.1. See [this](https://stackoverflow.com/questions/58878848/java-lang-illegalargumentexception-when-applying-a-python-udf-to-a-spark-datafra) Stackoverflow question.

Testing *predict_serie* function (which is used in *make_pandas_udf*). 

In [7]:
pdf_sample = df_input.sample(False,fraction=0.10,seed=12345).toPandas()

In [18]:
pd.concat([pdf_sample.input.head(10),predict_serie(pdf_sample.input,False).head(10).rename("category")],axis=1)

Unnamed: 0,input,category
0,is filestream lazy loaded in net,.net
1,programmatically launching standalone adobe fl...,windows
2,encoding problem classic asp,asp.net
3,c # winforms datagridview/sql compact negative...,c#
4,suspending and notifying threads when there is...,java
5,creating my own iterators,c#
6,css `` see through '' background crazy navigat...,asp.net
7,sending email in net through gmail,.net
8,specify ordinals of c++ exported functions in ...,c++
9,in c # or any language what is/are your favour...,c#


In [19]:
pd.concat([pdf_sample.input.head(10),predict_serie(pdf_sample.input,True).head(10).rename("category")],axis=1)

Unnamed: 0,input,category
0,is filestream lazy loaded in net,"[.net, c#, asp.net]"
1,programmatically launching standalone adobe fl...,"[windows, .net, c#]"
2,encoding problem classic asp,"[asp.net, asp.net-mvc, css]"
3,c # winforms datagridview/sql compact negative...,"[c#, .net, winforms]"
4,suspending and notifying threads when there is...,"[java, c#, .net]"
5,creating my own iterators,"[c#, .net, asp.net]"
6,css `` see through '' background crazy navigat...,"[asp.net, javascript, html]"
7,sending email in net through gmail,"[.net, c#, asp.net]"
8,specify ordinals of c++ exported functions in ...,"[c++, c, java]"
9,in c # or any language what is/are your favour...,"[c#, .net, asp.net]"


## Single prediction

In [11]:
udf_predict = make_pandas_udf(multi_prediction=False)
df_output = df_input.withColumn("category",udf_predict(col("input")))
%timeit -n 20 df_output.sample(False,.10).show(10,False)

+-------------------------------------------------------------------------+----------+
|input                                                                    |category  |
+-------------------------------------------------------------------------+----------+
|how can i send an array to php through ajax                              |php       |
|python beyond the basics                                                 |python    |
|how do you deal with connection strings when deploying an asp net site   |asp.net   |
|game programming and event handlers                                      |c#        |
|building flex projects in ant/nant                                       |flex      |
|how to generate unit test code for methods                               |java      |
|should i provide a deep clone when implementing icloneable               |java      |
|what is the simplest way to find the difference between 2 times in python|python    |
|casting array of objects which implement i

+--------------------------------------------------------------------------------------------------------+----------+
|input                                                                                                   |category  |
+--------------------------------------------------------------------------------------------------------+----------+
|making an image greyscale with gd library                                                               |c#        |
|how can i send an array to php through ajax                                                             |php       |
|should i have one class for every database i use                                                        |java      |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|sql-server|
|best update method for mysql db                                                                         |mysql     |
|how to implement a singleton in c #                    

+----------------------------------------------------------------+-------------+
|input                                                           |category     |
+----------------------------------------------------------------+-------------+
|sql server and the guest account what is this for               |sql-server   |
|whats the best way to start using mylyn                         |c#           |
|vector shape on stage appears over dynamic textfield            |c++          |
|should i have one class for every database i use                |java         |
|carbide / symbian c++ change application icon                   |c++          |
|how to disable visual studio macro `` tip '' balloon            |visual-studio|
|c # lambda expressions or delegates as a properties or arguments|c#           |
|how to generate unit test code for methods                      |java         |
|converting svg to png using c #                                 |c#           |
|how do i call net code c # 

+----------------------------------------------------------------------+----------+
|input                                                                 |category  |
+----------------------------------------------------------------------+----------+
|how do you deal with connection strings when deploying an asp net site|asp.net   |
|while clause in t sql that loops forever                              |sql       |
|how to generate unit test code for methods                            |java      |
|is there a way to asynchronously filter an ilist                      |python    |
|how can i determine the ip of my router/gateway in java               |java      |
|how do i focus a foreign window                                       |python    |
|be notified when visual/logical child added/removed                   |c#        |
|class methods as event handlers in javascript                         |javascript|
|is it true that the smallest memory allocation in net is a byte       |.net

+--------------------------------------------------------------+----------+
|input                                                         |category  |
+--------------------------------------------------------------+----------+
|is filestream lazy loaded in net                              |.net      |
|how do i add custom column to existing wss list template      |c#        |
|authoritative source on xml sig                               |.net      |
|while clause in t sql that loops forever                      |sql       |
|implementing and enforcing coding standards                   |c++       |
|how to return a page of results from sql                      |sql-server|
|creating my own iterators                                     |c#        |
|how would you attack this polymorphism string building problem|c#        |
|which css tag creates a box like this with title              |javascript|
|thotkey with win key support                                  |c#        |
+-----------

+-------------------------------------------------------------------------------+----------+
|input                                                                          |category  |
+-------------------------------------------------------------------------------+----------+
|gantt chart controls on windows forms                                          |windows   |
|carbide / symbian c++ change application icon                                  |c++       |
|the necessity of hiding the salt for a hash                                    |c++       |
|is it possible to define in a dependent dll s application config               |c#        |
|c # winforms datagridview/sql compact negative integer in primary key column   |c#        |
|what is the best way to determine the number of days in a month with javascript|javascript|
|setting the height of a div dynamically                                        |c#        |
|creating my own iterators                                            

+------------------------------------------------------------------------------------+----------+
|input                                                                               |category  |
+------------------------------------------------------------------------------------+----------+
|transforming selected text with a hotkey                                            |c#        |
|build tar file from directory in php without exec/passthru                          |php       |
|how to consume json web services from a windows client                              |.net      |
|how to effectively implement sessions in gae                                        |ruby      |
|carbide / symbian c++ change application icon                                       |c++       |
|how can i create prototype methods like javascript in c # net                       |c#        |
|eclipse hide paths in the `` open resource '' dialog                                |java      |
|what is the best wa

+--------------------------------------------------------------------+--------+
|input                                                               |category|
+--------------------------------------------------------------------+--------+
|how can i send an array to php through ajax                         |php     |
|prevent long word to add horizontal scroll to html view             |asp.net |
|programmatically launching standalone adobe flashplayer on linux/x11|windows |
|doctype rss & html entities                                         |html    |
|how can i get a list of available wireless networks on linux        |python  |
|c # lambda expressions or delegates as a properties or arguments    |c#      |
|how do you quickly find the url for a win32 api on msdn             |python  |
|db side encryption via nhibernate                                   |wcf     |
|percentages of subtotal in a report                                 |c++     |
|can you use an alias in the where claus

+----------------------------------------------------------------------------+----------+
|input                                                                       |category  |
+----------------------------------------------------------------------------+----------+
|sql server and the guest account what is this for                           |sql-server|
|c the definitive truth about rand random and arc4random                     |c         |
|prevent long word to add horizontal scroll to html view                     |asp.net   |
|python beyond the basics                                                    |python    |
|vector shape on stage appears over dynamic textfield                        |c++       |
|carbide / symbian c++ change application icon                               |c++       |
|the necessity of hiding the salt for a hash                                 |c++       |
|c # lambda expressions or delegates as a properties or arguments            |c#        |
|c # winfo

+-------------------------------------------------------------------------+-----------+
|input                                                                    |category   |
+-------------------------------------------------------------------------+-----------+
|transforming selected text with a hotkey                                 |c#         |
|gantt chart controls on windows forms                                    |windows    |
|how to implement a singleton in c #                                      |c#         |
|post from one controller action to another not redirect                  |asp.net    |
|ant and the available task what if something is not available            |java       |
|setting the height of a div dynamically                                  |c#         |
|how can i determine the ip of my router/gateway in java                  |java       |
|is it true that the smallest memory allocation in net is a byte          |.net       |
|what is the simplest way to fin

+--------------------------------------------------------------------------------------+-------------+
|input                                                                                 |category     |
+--------------------------------------------------------------------------------------+-------------+
|how can i send an array to php through ajax                                           |php          |
|python beyond the basics                                                              |python       |
|should i have one class for every database i use                                      |java         |
|how to pass an unpersisted modified object from view back to controller without a form|c#           |
|how to disable visual studio macro `` tip '' balloon                                  |visual-studio|
|vertical text with jquery                                                             |jquery       |
|setting the height of a div dynamically                                 

+----------------------------------------------------------------------------+-------------+
|input                                                                       |category     |
+----------------------------------------------------------------------------+-------------+
|how can i send an array to php through ajax                                 |php          |
|prevent long word to add horizontal scroll to html view                     |asp.net      |
|python beyond the basics                                                    |python       |
|sql query count with 0 count                                                |sql          |
|how to disable visual studio macro `` tip '' balloon                        |visual-studio|
|best way to use a db table as a message/job queue                           |java         |
|c # winforms datagridview/sql compact negative integer in primary key column|c#           |
|suspending and notifying threads when there is work to do            

+-----------------------------------------------------------------------------------+-----------+
|input                                                                              |category   |
+-----------------------------------------------------------------------------------+-----------+
|programmatically launching standalone adobe flashplayer on linux/x11               |windows    |
|ms sql 2000 turn off logging during stored procedure                               |sql-server |
|db side encryption via nhibernate                                                  |wcf        |
|how can i create prototype methods like javascript in c # net                      |c#         |
|how do i extract the version and path from an svn working copy into a nant variable|java       |
|decoding chunked http with actionscript                                            |silverlight|
|unicode vs str decode for a utf8 encoded byte string python 2 x                    |python     |
|sorting and groupin

+----------------------------------------------------------------------------------------------------------------+-------------+
|input                                                                                                           |category     |
+----------------------------------------------------------------------------------------------------------------+-------------+
|c # lambda expressions or delegates as a properties or arguments                                                |c#           |
|actionscript3 to javascript communication best practices                                                        |javascript   |
|asp net mvc beta 1 defaultmodelbinder wrongly persists parameter and validation state between unrelated requests|asp.net-mvc  |
|how do you optimise your javascript                                                                             |javascript   |
|how to add a dynamic weather ticker to the webpage                                              

+---------------------------------------------------------------+----------+
|input                                                          |category  |
+---------------------------------------------------------------+----------+
|is filestream lazy loaded in net                               |.net      |
|python beyond the basics                                       |python    |
|while clause in t sql that loops forever                       |sql       |
|ms sql 2000 turn off logging during stored procedure           |sql-server|
|doctype rss & html entities                                    |html      |
|c # in linux environment                                       |c#        |
|suspending and notifying threads when there is work to do      |java      |
|what is the best way to see what files are locked in subversion|java      |
|how can i determine the ip of my router/gateway in java        |java      |
|parsing t sql to parameterize a query                          |sql       |

+-------------------------------------------------------------------------------+----------+
|input                                                                          |category  |
+-------------------------------------------------------------------------------+----------+
|how do i add custom column to existing wss list template                       |c#        |
|vector shape on stage appears over dynamic textfield                           |c++       |
|best update method for mysql db                                                |mysql     |
|can you use an alias in the where clause in mysql                              |mysql     |
|what is the best way to determine the number of days in a month with javascript|javascript|
|how can i ban a whole company from my web site                                 |java      |
|find all storage devices attached to a linux machine                           |java      |
|2d javascript array                                                  

+-------------------------------------------------------------+----------+
|input                                                        |category  |
+-------------------------------------------------------------+----------+
|is filestream lazy loaded in net                             |.net      |
|whats the best way to start using mylyn                      |c#        |
|game programming and event handlers                          |c#        |
|mac iwork/pages automation                                   |flash     |
|sql query count with 0 count                                 |sql       |
|ms sql 2000 turn off logging during stored procedure         |sql-server|
|db side encryption via nhibernate                            |wcf       |
|ant and the available task what if something is not available|java      |
|vertical text with jquery                                    |jquery    |
|what design pattern to use for user authentication in java   |java      |
+------------------------

+----------------------------------------------------------------------------+-------------+
|input                                                                       |category     |
+----------------------------------------------------------------------------+-------------+
|build tar file from directory in php without exec/passthru                  |php          |
|is filestream lazy loaded in net                                            |.net         |
|game programming and event handlers                                         |c#           |
|mac iwork/pages automation                                                  |flash        |
|building flex projects in ant/nant                                          |flex         |
|how to disable visual studio macro `` tip '' balloon                        |visual-studio|
|c # winforms datagridview/sql compact negative integer in primary key column|c#           |
|actionscript3 to javascript communication best practices             

+--------------------------------------------------------------------------------------------------------+----------+
|input                                                                                                   |category  |
+--------------------------------------------------------------------------------------------------------+----------+
|how to consume json web services from a windows client                                                  |.net      |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|sql-server|
|daemon threads explanation                                                                              |c#        |
|parsing t sql to parameterize a query                                                                   |sql       |
|sending email in net through gmail                                                                      |.net      |
|how to serialize an object to xml without getting xmlns

+-------------------------------------------------------------------------+--------+
|input                                                                    |category|
+-------------------------------------------------------------------------+--------+
|how do you manage infragistics webgrid data from javascript/ajax code    |c#      |
|is filestream lazy loaded in net                                         |.net    |
|how to effectively implement sessions in gae                             |ruby    |
|how to implement a singleton in c #                                      |c#      |
|post from one controller action to another not redirect                  |asp.net |
|vertical text with jquery                                                |jquery  |
|stopping msi from launching an exe in the system context                 |.net    |
|what is the simplest way to find the difference between 2 times in python|python  |
|can you set or where is the local document root                 

+----------------------------------------------------------------------+----------+
|input                                                                 |category  |
+----------------------------------------------------------------------+----------+
|how do you deal with connection strings when deploying an asp net site|asp.net   |
|are incrementers / decrementers var++ var etc thread safe             |c++       |
|best update method for mysql db                                       |mysql     |
|how can i get a list of available wireless networks on linux          |python    |
|how to generate unit test code for methods                            |java      |
|best way to use a db table as a message/job queue                     |java      |
|eclipse hide paths in the `` open resource '' dialog                  |java      |
|what design pattern to use for user authentication in java            |java      |
|creating my own iterators                                             |c#  

## Multi prediction

In [12]:
udf_predict = make_pandas_udf(multi_prediction=True)
df_output = df_input.withColumn("category",udf_predict(col("input")))
%timeit -n 20 df_output.sample(False,.10).show(10,False)

+-----------------------------------------------------------------------------------------+-----------------------------+
|input                                                                                    |category                     |
+-----------------------------------------------------------------------------------------+-----------------------------+
|how do i add custom column to existing wss list template                                 |[c#, .net, java]             |
|best update method for mysql db                                                          |[mysql, sql, database]       |
|the necessity of hiding the salt for a hash                                              |[c++, c#, java]              |
|ms sql 2000 turn off logging during stored procedure                                     |[sql-server, sql, database]  |
|db side encryption via nhibernate                                                        |[wcf, flash, linq-to-sql]    |
|how do i extract the ve

+--------------------------------------------------------------------+-----------------------------------------+
|input                                                               |category                                 |
+--------------------------------------------------------------------+-----------------------------------------+
|should i have one class for every database i use                    |[java, c#, sql-server]                   |
|authoritative source on xml sig                                     |[.net, php, c#]                          |
|how do you quickly find the url for a win32 api on msdn             |[python, c++, c#]                        |
|best way to use a db table as a message/job queue                   |[java, sql, sql-server]                  |
|can you use an alias in the where clause in mysql                   |[mysql, ruby, sql]                       |
|what s the term for design ala `` object method1 method2 method3 `` |[c#, python, c]           

+----------------------------------------------------------------------+---------------------------+
|input                                                                 |category                   |
+----------------------------------------------------------------------+---------------------------+
|how to consume json web services from a windows client                |[.net, windows, c#]        |
|how do you deal with connection strings when deploying an asp net site|[asp.net, c#, asp.net-mvc] |
|building flex projects in ant/nant                                    |[flex, ruby, silverlight]  |
|suspending and notifying threads when there is work to do             |[java, c#, .net]           |
|should i provide a deep clone when implementing icloneable            |[java, c#, c++]            |
|class methods as event handlers in javascript                         |[javascript, html, asp.net]|
|numbering regex submatches                                            |[php, html, regex] 

+---------------------------------------------------------------------+---------------------------+
|input                                                                |category                   |
+---------------------------------------------------------------------+---------------------------+
|how do you manage infragistics webgrid data from javascript/ajax code|[c#, java, .net]           |
|authoritative source on xml sig                                      |[.net, php, c#]            |
|how to return a page of results from sql                             |[sql-server, c#, sql]      |
|db side encryption via nhibernate                                    |[wcf, flash, linq-to-sql]  |
|eclipse hide paths in the `` open resource '' dialog                 |[java, c++, c]             |
|can you use an alias in the where clause in mysql                    |[mysql, ruby, sql]         |
|creating my own iterators                                            |[c#, .net, asp.net]        |


+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|how do you manage infragistics webgrid data from javascript/ajax code                                   |[c#, java, .net]                  |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|[sql-server, sql, sql-server-2005]|
|carbide / symbian c++ change application icon                                                           |[c++, c, windows]                 |
|building flex projects in ant/nant                                                                      |[flex, ruby, silverlight]         |
|progr

+----------------------------------------------------------------------+--------------------------+
|input                                                                 |category                  |
+----------------------------------------------------------------------+--------------------------+
|how can i change html attribute names with jquery                     |[jquery, javascript, html]|
|mac iwork/pages automation                                            |[flash, wcf, flex]        |
|how can i get a list of available wireless networks on linux          |[python, c#, .net]        |
|how do i call net code c # /vb net from vbscript                      |[c#, .net, asp.net]       |
|suspending and notifying threads when there is work to do             |[java, c#, .net]          |
|should db layer members be static or instance                         |[java, asp.net, c#]       |
|drawing a custom label on a pie chart in yahoo s flash library astra  |[ruby, python, windows]   |


+----------------------------------------------------------------------------+--------------------------+
|input                                                                       |category                  |
+----------------------------------------------------------------------------+--------------------------+
|transforming selected text with a hotkey                                    |[c#, python, asp.net]     |
|whats the best way to start using mylyn                                     |[c#, .net, javascript]    |
|are incrementers / decrementers var++ var etc thread safe                   |[c++, javascript, windows]|
|authoritative source on xml sig                                             |[.net, php, c#]           |
|programmatically launching standalone adobe flashplayer on linux/x11        |[windows, .net, c#]       |
|post from one controller action to another not redirect                     |[asp.net, html, css]      |
|db side encryption via nhibernate            

+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|transforming selected text with a hotkey                                                                |[c#, python, asp.net]             |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|[sql-server, sql, sql-server-2005]|
|implementing and enforcing coding standards                                                             |[c++, java, c#]                   |
|what is the best way to determine the number of days in a month with javascript                         |[javascript, java, c++]           |
|how c

+---------------------------------------------------------------------+--------------------+
|input                                                                |category            |
+---------------------------------------------------------------------+--------------------+
|making an image greyscale with gd library                            |[c#, php, html]     |
|c the definitive truth about rand random and arc4random              |[c, c#, c++]        |
|how do you manage infragistics webgrid data from javascript/ajax code|[c#, java, .net]    |
|how does traceroute work                                             |[c#, .net, java]    |
|post from one controller action to another not redirect              |[asp.net, html, css]|
|how do i call net code c # /vb net from vbscript                     |[c#, .net, asp.net] |
|daemon threads explanation                                           |[c#, svn, algorithm]|
|creating my own iterators                                            

+------------------------------------------------------------------------------------+----------------------------------+
|input                                                                               |category                          |
+------------------------------------------------------------------------------------+----------------------------------+
|sql server and the guest account what is this for                                   |[sql-server, sql, database]       |
|how do you manage infragistics webgrid data from javascript/ajax code               |[c#, java, .net]                  |
|how do you deal with connection strings when deploying an asp net site              |[asp.net, c#, asp.net-mvc]        |
|the necessity of hiding the salt for a hash                                         |[c++, c#, java]                   |
|what design pattern to use for user authentication in java                          |[java, eclipse, unit-testing]     |
|parsing t sql to parame

+----------------------------------------------------------------------------+-----------------------------+
|input                                                                       |category                     |
+----------------------------------------------------------------------------+-----------------------------+
|python beyond the basics                                                    |[python, c++, windows]       |
|programmatically launching standalone adobe flashplayer on linux/x11        |[windows, .net, c#]          |
|doctype rss & html entities                                                 |[html, asp.net, css]         |
|c # winforms datagridview/sql compact negative integer in primary key column|[c#, .net, winforms]         |
|can you use an alias in the where clause in mysql                           |[mysql, ruby, sql]           |
|sending email in net through gmail                                          |[.net, c#, asp.net]          |
|why is app_offline

+----------------------------------------------------------------------------+--------------------------+
|input                                                                       |category                  |
+----------------------------------------------------------------------------+--------------------------+
|c the definitive truth about rand random and arc4random                     |[c, c#, c++]              |
|gantt chart controls on windows forms                                       |[windows, .net, c#]       |
|how to consume json web services from a windows client                      |[.net, windows, c#]       |
|python beyond the basics                                                    |[python, c++, windows]    |
|vector shape on stage appears over dynamic textfield                        |[c++, c#, .net]           |
|what is the aspnet_client folder for under the iis structure                |[.net, c#, c++]           |
|vertical text with jquery                    

+----------------------------------------------------------------------+-----------------------------+
|input                                                                 |category                     |
+----------------------------------------------------------------------+-----------------------------+
|build tar file from directory in php without exec/passthru            |[php, c++, .net]             |
|are incrementers / decrementers var++ var etc thread safe             |[c++, javascript, windows]   |
|building flex projects in ant/nant                                    |[flex, ruby, silverlight]    |
|c # in linux environment                                              |[c#, c, winforms]            |
|converting svg to png using c #                                       |[c#, .net, winforms]         |
|eclipse text comparison order                                         |[eclipse, java, unit-testing]|
|can you use an alias in the where clause in mysql                     |[

+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|sql server and the guest account what is this for                                                       |[sql-server, sql, database]       |
|prevent long word to add horizontal scroll to html view                                                 |[asp.net, .net, c#]               |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|[sql-server, sql, sql-server-2005]|
|how to implement a singleton in c #                                                                     |[c#, .net, c]                     |
|how d

+----------------------------------------------------------------------------+---------------------------+
|input                                                                       |category                   |
+----------------------------------------------------------------------------+---------------------------+
|build tar file from directory in php without exec/passthru                  |[php, c++, .net]           |
|whats the best way to start using mylyn                                     |[c#, .net, javascript]     |
|are incrementers / decrementers var++ var etc thread safe                   |[c++, javascript, windows] |
|doctype rss & html entities                                                 |[html, asp.net, css]       |
|converting svg to png using c #                                             |[c#, .net, winforms]       |
|encoding problem classic asp                                                |[asp.net, asp.net-mvc, css]|
|c # winforms datagridview/sql compac

+----------------------------------------------------------+-----------------------------+
|input                                                     |category                     |
+----------------------------------------------------------+-----------------------------+
|whats the best way to start using mylyn                   |[c#, .net, javascript]       |
|how do i add custom column to existing wss list template  |[c#, .net, java]             |
|how do you quickly find the url for a win32 api on msdn   |[python, c++, c#]            |
|post from one controller action to another not redirect   |[asp.net, html, css]         |
|c # in linux environment                                  |[c#, c, winforms]            |
|suspending and notifying threads when there is work to do |[java, c#, .net]             |
|what design pattern to use for user authentication in java|[java, eclipse, unit-testing]|
|how can i determine the ip of my router/gateway in java   |[java, c++, c#]              |

+-------------------------------------------------------------------+---------------------------+
|input                                                              |category                   |
+-------------------------------------------------------------------+---------------------------+
|transforming selected text with a hotkey                           |[c#, python, asp.net]      |
|is filestream lazy loaded in net                                   |[.net, c#, asp.net]        |
|whats the best way to start using mylyn                            |[c#, .net, javascript]     |
|how to effectively implement sessions in gae                       |[ruby, python, mysql]      |
|mac iwork/pages automation                                         |[flash, wcf, flex]         |
|best update method for mysql db                                    |[mysql, sql, database]     |
|best way to use a db table as a message/job queue                  |[java, sql, sql-server]    |
|is there a way to a

+-------------------------------------------------------------------------+----------------------------------+
|input                                                                    |category                          |
+-------------------------------------------------------------------------+----------------------------------+
|best update method for mysql db                                          |[mysql, sql, database]            |
|programmatically launching standalone adobe flashplayer on linux/x11     |[windows, .net, c#]               |
|post from one controller action to another not redirect                  |[asp.net, html, css]              |
|vertical text with jquery                                                |[jquery, javascript, html]        |
|creating my own iterators                                                |[c#, .net, asp.net]               |
|how do i focus a foreign window                                          |[python, java, ruby]              |
|

+----------------------------------------------------------------------------------------------+---------------------------+
|input                                                                                         |category                   |
+----------------------------------------------------------------------------------------------+---------------------------+
|how can i send an array to php through ajax                                                   |[php, html, jquery]        |
|c the definitive truth about rand random and arc4random                                       |[c, c#, c++]               |
|carbide / symbian c++ change application icon                                                 |[c++, c, windows]          |
|building flex projects in ant/nant                                                            |[flex, ruby, silverlight]  |
|why learn perl python ruby if the company is using c++ c # or java as the application language|[c++, c, python]           |


+----------------------------------------------------------------------------------------------+--------------------------+
|input                                                                                         |category                  |
+----------------------------------------------------------------------------------------------+--------------------------+
|gantt chart controls on windows forms                                                         |[windows, .net, c#]       |
|prevent long word to add horizontal scroll to html view                                       |[asp.net, .net, c#]       |
|are incrementers / decrementers var++ var etc thread safe                                     |[c++, javascript, windows]|
|how to implement a singleton in c #                                                           |[c#, .net, c]             |
|why learn perl python ruby if the company is using c++ c # or java as the application language|[c++, c, python]          |
|how to 

+---------------------------------------------+----------------------+
|input                                        |category              |
+---------------------------------------------+----------------------+
|making an image greyscale with gd library    |[c#, php, html]       |
|whats the best way to start using mylyn      |[c#, .net, javascript]|
|python beyond the basics                     |[python, c++, windows]|
|game programming and event handlers          |[c#, .net, asp.net]   |
|carbide / symbian c++ change application icon|[c++, c, windows]     |
|best update method for mysql db              |[mysql, sql, database]|
|doctype rss & html entities                  |[html, asp.net, css]  |
|converting svg to png using c #              |[c#, .net, winforms]  |
|daemon threads explanation                   |[c#, svn, algorithm]  |
|setting the height of a div dynamically      |[c#, javascript, .net]|
+---------------------------------------------+----------------------+
only s

+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|c the definitive truth about rand random and arc4random                                                 |[c, c#, c++]                      |
|how to consume json web services from a windows client                                                  |[.net, windows, c#]               |
|python beyond the basics                                                                                |[python, c++, windows]            |
|vector shape on stage appears over dynamic textfield                                                    |[c++, c#, .net]                   |
|how t

+-------------------------------------------------------------------------------+-----------------------------------------+
|input                                                                          |category                                 |
+-------------------------------------------------------------------------------+-----------------------------------------+
|authoritative source on xml sig                                                |[.net, php, c#]                          |
|best way to use a db table as a message/job queue                              |[java, sql, sql-server]                  |
|ant and the available task what if something is not available                  |[java, .net, c#]                         |
|what is the best way to determine the number of days in a month with javascript|[javascript, java, c++]                  |
|how can i determine the ip of my router/gateway in java                        |[java, c++, c#]                          |
|stoppin

+----------------------------------------------------------------------------+------------------------------+
|input                                                                       |category                      |
+----------------------------------------------------------------------------+------------------------------+
|how do i add custom column to existing wss list template                    |[c#, .net, java]              |
|should i have one class for every database i use                            |[java, c#, sql-server]        |
|how can i create prototype methods like javascript in c # net               |[c#, .net, asp.net]           |
|best way to use a db table as a message/job queue                           |[java, sql, sql-server]       |
|should i provide a deep clone when implementing icloneable                  |[java, c#, c++]               |
|in c # or any language what is/are your favourite way of removing repetition|[c#, .net, asp.net]           |
|decoding 

+--------------------------------------------------------------------------------------+-----------------------------+
|input                                                                                 |category                     |
+--------------------------------------------------------------------------------------+-----------------------------+
|how can i send an array to php through ajax                                           |[php, html, jquery]          |
|is filestream lazy loaded in net                                                      |[.net, c#, asp.net]          |
|whats the best way to start using mylyn                                               |[c#, .net, javascript]       |
|how to pass an unpersisted modified object from view back to controller without a form|[c#, .net, javascript]       |
|getting odd error on net executenonquery                                              |[.net, c#, asp.net]          |
|authoritative source on xml sig                

+----------------------------------------------------------------+---------------------------+
|input                                                           |category                   |
+----------------------------------------------------------------+---------------------------+
|mac iwork/pages automation                                      |[flash, wcf, flex]         |
|c # lambda expressions or delegates as a properties or arguments|[c#, .net, asp.net]        |
|post from one controller action to another not redirect         |[asp.net, html, css]       |
|suspending and notifying threads when there is work to do       |[java, c#, .net]           |
|how would you attack this polymorphism string building problem  |[c#, c++, .net]            |
|should i provide a deep clone when implementing icloneable      |[java, c#, c++]            |
|stopping msi from launching an exe in the system context        |[.net, c#, c++]            |
|what techniques can you use to profile your code 

## From ``array<string>`` type to `string` type

In [15]:
df_output.withColumn('category', concat_ws('|', 'category')).show(20,False)

+--------------------------------------------------------------------------------------+-----------------------+
|input                                                                                 |category               |
+--------------------------------------------------------------------------------------+-----------------------+
|making an image greyscale with gd library                                             |c#|php|html            |
|transforming selected text with a hotkey                                              |c#|python|asp.net      |
|sql server and the guest account what is this for                                     |sql-server|sql|database|
|how can i change html attribute names with jquery                                     |jquery|javascript|html |
|how can i send an array to php through ajax                                           |php|html|jquery        |
|c the definitive truth about rand random and arc4random                               |c|c#|c++