In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, concat_ws
from classifier import make_udf, predict_serie, make_pandas_udf
import pandas as pd




In [2]:
spark = SparkSession.builder.appName('mysession').getOrCreate()

```python
spark.sparkContext.addFile('models/ft_tuned.ftz')
spark.sparkContext.addPyFile('./classifier.py')
```

# Generating Test File

In [4]:
! head -10 data/test

__label__php __label__image making an image greyscale with gd library
__label__eclipse transforming selected text with a hotkey
__label__sql-server sql server and the guest account what is this for
__label__jquery __label__html how can i change html attribute names with jquery
__label__php __label__ajax how can i send an array to php through ajax
__label__c __label__cocoa c the definitive truth about rand random and arc4random
__label__winforms gantt chart controls on windows forms
__label__php __label__linux build tar file from directory in php without exec/passthru
__label__javascript __label__ajax how do you manage infragistics webgrid data from javascript/ajax code
__label__wcf how to consume json web services from a windows client


```python
def keep_sentence_field(line):
    '''
    Function to keep only the text input given a labeled instance with fastText format.
    Example
    Input:
    '__label__python __label__django help with unit testing in a python app using django'
    Output:
    'help with unit testing in a python app using django'
    '''
    words = [x for x in line.split() if "__label__" not in x]
    output = ' '.join(words)
    return output

# Location of input file
inputFile = 'data/test'

# Define Python generators to 1) read lines, 2) keep only the sentence field
lines = (line for line in open(inputFile,encoding="ISO-8859-1"))
sentences = (keep_sentence_field(line) for line in lines)

# Location of output file
outputFile = 'data/spark_input'

# Apply the generators and write predictions
with open(outputFile, 'w') as file:
    for sentence in sentences:
        file.write(sentence+'\n')
    file.close()
```

In [5]:
! head -10 data/spark_input

making an image greyscale with gd library
transforming selected text with a hotkey
sql server and the guest account what is this for
how can i change html attribute names with jquery
how can i send an array to php through ajax
c the definitive truth about rand random and arc4random
gantt chart controls on windows forms
build tar file from directory in php without exec/passthru
how do you manage infragistics webgrid data from javascript/ajax code
how to consume json web services from a windows client


# Build Spark DataFrame

In [6]:
from pyspark.sql.types import StructType, StructField, StringType, ArrayType

schema = StructType([StructField("input", StringType())])

df_input = spark.read.csv('data/spark_input', header=False, schema=schema)

# Approach 1: standard udf with distributed files

## Single prediction

In [7]:
udf_predict = make_udf(multi_prediction=False)
df_output = df_input.withColumn("category",udf_predict(col("input")))
%timeit -n 20 df_output.sample(False,.10).show(10,False)

+-------------------------------------------------------------+--------+
|input                                                        |category|
+-------------------------------------------------------------+--------+
|how can i send an array to php through ajax                  |php     |
|gantt chart controls on windows forms                        |windows |
|is filestream lazy loaded in net                             |.net    |
|how to effectively implement sessions in gae                 |ruby    |
|should i have one class for every database i use             |java    |
|getting odd error on net executenonquery                     |.net    |
|mac iwork/pages automation                                   |flash   |
|c # in linux environment                                     |c#      |
|how can i create prototype methods like javascript in c # net|c#      |
|ant and the available task what if something is not available|java    |
+--------------------------------------------------

+---------------------------------------------------------------+-------------+
|input                                                          |category     |
+---------------------------------------------------------------+-------------+
|how does traceroute work                                       |c#           |
|what is the best way to see what files are locked in subversion|java         |
|is there a way to asynchronously filter an ilist               |python       |
|creating my own iterators                                      |c#           |
|how to serialize an object to xml without getting xmlns `` ``  |c#           |
|using lists in c #                                             |c#           |
|why is visual studio constantly crashing                       |visual-studio|
|determine if type is a pointer in a template function          |c++          |
|protecting javascript code in a ff extension                   |javascript   |
|statistics engine for java ee web appli

+-------------------------------------------------------------------------+--------+
|input                                                                    |category|
+-------------------------------------------------------------------------+--------+
|implementing and enforcing coding standards                              |c++     |
|how to generate unit test code for methods                               |java    |
|how do i call net code c # /vb net from vbscript                         |c#      |
|eclipse text comparison order                                            |eclipse |
|how do i focus a foreign window                                          |python  |
|numbering regex submatches                                               |php     |
|how do i calculate the previous business day in ksh shell script         |ruby    |
|what is the simplest way to find the difference between 2 times in python|python  |
|unix socket implementation for java                             

+-----------------------------------------------------------------------------------+----------+
|input                                                                              |category  |
+-----------------------------------------------------------------------------------+----------+
|how do you manage infragistics webgrid data from javascript/ajax code              |c#        |
|how to return a page of results from sql                                           |sql-server|
|encoding problem classic asp                                                       |asp.net   |
|how do i call net code c # /vb net from vbscript                                   |c#        |
|how do i extract the version and path from an svn working copy into a nant variable|java      |
|ant and the available task what if something is not available                      |java      |
|what s the term for design ala `` object method1 method2 method3 ``                |c#        |
|numbering regex submatches   

+----------------------------------------------------------------------------------------------+----------+
|input                                                                                         |category  |
+----------------------------------------------------------------------------------------------+----------+
|game programming and event handlers                                                           |c#        |
|why learn perl python ruby if the company is using c++ c # or java as the application language|c++       |
|how to return a page of results from sql                                                      |sql-server|
|post from one controller action to another not redirect                                       |asp.net   |
|how to generate unit test code for methods                                                    |java      |
|converting svg to png using c #                                                               |c#        |
|best way to use a db table 

+----------------------------------------------------------------+--------+
|input                                                           |category|
+----------------------------------------------------------------+--------+
|whats the best way to start using mylyn                         |c#      |
|is it possible to define in a dependent dll s application config|c#      |
|what is the best way to see what files are locked in subversion |java    |
|is there a way to asynchronously filter an ilist                |python  |
|can you use an alias in the where clause in mysql               |mysql   |
|how can i determine the ip of my router/gateway in java         |java    |
|be notified when visual/logical child added/removed             |c#      |
|can you set or where is the local document root                 |c#      |
|determine if type is a pointer in a template function           |c++     |
|cross platform editor control                                   |python  |
+-----------

+----------------------------------------------------------------+--------+
|input                                                           |category|
+----------------------------------------------------------------+--------+
|how can i send an array to php through ajax                     |php     |
|gantt chart controls on windows forms                           |windows |
|whats the best way to start using mylyn                         |c#      |
|how do i add custom column to existing wss list template        |c#      |
|carbide / symbian c++ change application icon                   |c++     |
|c # lambda expressions or delegates as a properties or arguments|c#      |
|can you use an alias in the where clause in mysql               |mysql   |
|ant and the available task what if something is not available   |java    |
|what design pattern to use for user authentication in java      |java    |
|stopping msi from launching an exe in the system context        |.net    |
+-----------

+-------------------------------------------------------------------------------+----------+
|input                                                                          |category  |
+-------------------------------------------------------------------------------+----------+
|how can i change html attribute names with jquery                              |jquery    |
|python beyond the basics                                                       |python    |
|converting svg to png using c #                                                |c#        |
|what is the best way to see what files are locked in subversion                |java      |
|percentages of subtotal in a report                                            |c++       |
|what is the best way to determine the number of days in a month with javascript|javascript|
|parsing t sql to parameterize a query                                          |sql       |
|drawing a custom label on a pie chart in yahoo s flash library astra 

+-------------------------------------------------------------------+----------+
|input                                                              |category  |
+-------------------------------------------------------------------+----------+
|should i have one class for every database i use                   |java      |
|how to return a page of results from sql                           |sql-server|
|best way to use a db table as a message/job queue                  |java      |
|can you use an alias in the where clause in mysql                  |mysql     |
|what s the term for design ala `` object method1 method2 method3 ``|c#        |
|should db layer members be static or instance                      |java      |
|stopping msi from launching an exe in the system context           |.net      |
|how do i calculate the previous business day in ksh shell script   |ruby      |
|using lists in c #                                                 |c#        |
|rendered pixel width data f

+------------------------------------------------------------------------------------+-----------+
|input                                                                               |category   |
+------------------------------------------------------------------------------------+-----------+
|how do i add custom column to existing wss list template                            |c#         |
|getting odd error on net executenonquery                                            |.net       |
|best way to use a db table as a message/job queue                                   |java       |
|creating my own iterators                                                           |c#         |
|be notified when visual/logical child added/removed                                 |c#         |
|stopping msi from launching an exe in the system context                            |.net       |
|html over flash without stopping interaction with flash                             |asp.net-mvc|
|http on e

+--------------------------------------------------------------------------------------------------------+----------+
|input                                                                                                   |category  |
+--------------------------------------------------------------------------------------------------------+----------+
|build tar file from directory in php without exec/passthru                                              |php       |
|how do i add custom column to existing wss list template                                                |c#        |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|sql-server|
|authoritative source on xml sig                                                                         |.net      |
|how can i create prototype methods like javascript in c # net                                           |c#        |
|best way to use a db table as a message/job queue      

+----------------------------------------------------------------------------------------------------------------+-----------+
|input                                                                                                           |category   |
+----------------------------------------------------------------------------------------------------------------+-----------+
|how do you manage infragistics webgrid data from javascript/ajax code                                           |c#         |
|prevent long word to add horizontal scroll to html view                                                         |asp.net    |
|how to implement a singleton in c #                                                                             |c#         |
|converting svg to png using c #                                                                                 |c#         |
|numbering regex submatches                                                                                    

+----------------------------------------------------------+-------------+
|input                                                     |category     |
+----------------------------------------------------------+-------------+
|how can i change html attribute names with jquery         |jquery       |
|how can i send an array to php through ajax               |php          |
|game programming and event handlers                       |c#           |
|building flex projects in ant/nant                        |flex         |
|how to disable visual studio macro `` tip '' balloon      |visual-studio|
|implementing and enforcing coding standards               |c++          |
|how to return a page of results from sql                  |sql-server   |
|daemon threads explanation                                |c#           |
|should i provide a deep clone when implementing icloneable|java         |
|using lists in c #                                        |c#           |
+------------------------

+--------------------------------------------------------------------------------------+--------+
|input                                                                                 |category|
+--------------------------------------------------------------------------------------+--------+
|how can i change html attribute names with jquery                                     |jquery  |
|how can i send an array to php through ajax                                           |php     |
|gantt chart controls on windows forms                                                 |windows |
|how to consume json web services from a windows client                                |.net    |
|how to pass an unpersisted modified object from view back to controller without a form|c#      |
|doctype rss & html entities                                                           |html    |
|how can i create prototype methods like javascript in c # net                         |c#      |
|converting svg to p

+------------------------------------------------------------------------+--------+
|input                                                                   |category|
+------------------------------------------------------------------------+--------+
|is filestream lazy loaded in net                                        |.net    |
|game programming and event handlers                                     |c#      |
|sql query count with 0 count                                            |sql     |
|converting svg to png using c #                                         |c#      |
|how would you attack this polymorphism string building problem          |c#      |
|is it true that the smallest memory allocation in net is a byte         |.net    |
|numbering regex submatches                                              |php     |
|using lists in c #                                                      |c#      |
|can you set or where is the local document root                         |c#

+-------------------------------------------------------+--------+
|input                                                  |category|
+-------------------------------------------------------+--------+
|how can i send an array to php through ajax            |php     |
|gantt chart controls on windows forms                  |windows |
|prevent long word to add horizontal scroll to html view|asp.net |
|whats the best way to start using mylyn                |c#      |
|doctype rss & html entities                            |html    |
|how do i call net code c # /vb net from vbscript       |c#      |
|setting the height of a div dynamically                |c#      |
|creating my own iterators                              |c#      |
|numbering regex submatches                             |php     |
|specify ordinals of c++ exported functions in a dll    |c++     |
+-------------------------------------------------------+--------+
only showing top 10 rows

+-----------------------------------

+----------------------------------------------------------------------------------------------+----------+
|input                                                                                         |category  |
+----------------------------------------------------------------------------------------------+----------+
|transforming selected text with a hotkey                                                      |c#        |
|authoritative source on xml sig                                                               |.net      |
|why learn perl python ruby if the company is using c++ c # or java as the application language|c++       |
|how to return a page of results from sql                                                      |sql-server|
|what is the best way to determine the number of days in a month with javascript               |javascript|
|using lists in c #                                                                            |c#        |
|drawing a custom label on a

+----------------------------------------------------------------------------+--------+
|input                                                                       |category|
+----------------------------------------------------------------------------+--------+
|game programming and event handlers                                         |c#      |
|how can i get a list of available wireless networks on linux                |python  |
|post from one controller action to another not redirect                     |asp.net |
|how can i create prototype methods like javascript in c # net               |c#      |
|encoding problem classic asp                                                |asp.net |
|c # winforms datagridview/sql compact negative integer in primary key column|c#      |
|eclipse text comparison order                                               |eclipse |
|what is the simplest way to find the difference between 2 times in python   |python  |
|authenticating against active d

+-------------------------------------------------------------------------------+-----------+
|input                                                                          |category   |
+-------------------------------------------------------------------------------+-----------+
|build tar file from directory in php without exec/passthru                     |php        |
|python beyond the basics                                                       |python     |
|how do i add custom column to existing wss list template                       |c#         |
|sql query count with 0 count                                                   |sql        |
|best way to use a db table as a message/job queue                              |java       |
|vertical text with jquery                                                      |jquery     |
|what is the best way to determine the number of days in a month with javascript|javascript |
|should db layer members be static or instance              

+------------------------------------------------------------+-----------+
|input                                                       |category   |
+------------------------------------------------------------+-----------+
|how do i add custom column to existing wss list template    |c#         |
|should i have one class for every database i use            |java       |
|how does traceroute work                                    |c#         |
|the necessity of hiding the salt for a hash                 |c++        |
|what is the aspnet_client folder for under the iis structure|.net       |
|be notified when visual/logical child added/removed         |c#         |
|can you set or where is the local document root             |c#         |
|sql delete suspended in activity monitor                    |sql        |
|html over flash without stopping interaction with flash     |asp.net-mvc|
|what does it mean when a bug doesn t crash the program      |c++        |
+------------------------

+-------------------------------------------------------------------------+-------------+
|input                                                                    |category     |
+-------------------------------------------------------------------------+-------------+
|prevent long word to add horizontal scroll to html view                  |asp.net      |
|python beyond the basics                                                 |python       |
|how to return a page of results from sql                                 |sql-server   |
|how do you quickly find the url for a win32 api on msdn                  |python       |
|what is the aspnet_client folder for under the iis structure             |.net         |
|vertical text with jquery                                                |jquery       |
|is it true that the smallest memory allocation in net is a byte          |.net         |
|what is the simplest way to find the difference between 2 times in python|python       |
|can you s

## Multi prediction

In [8]:
udf_predict = make_udf(multi_prediction=True)
df_output = df_input.withColumn("category",udf_predict(col("input")))
%timeit -n 20 df_output.sample(False,.10).show(10,False)

+-----------------------------------------------------------------------------------+------------------------+
|input                                                                              |category                |
+-----------------------------------------------------------------------------------+------------------------+
|prevent long word to add horizontal scroll to html view                            |[asp.net, .net, c#]     |
|while clause in t sql that loops forever                                           |[sql, sql-server, mysql]|
|implementing and enforcing coding standards                                        |[c++, java, c#]         |
|daemon threads explanation                                                         |[c#, svn, algorithm]    |
|how do i extract the version and path from an svn working copy into a nant variable|[java, c++, .net]       |
|percentages of subtotal in a report                                                |[c++, java, c#]         |
|

+-------------------------------------------------------------------+---------------------------+
|input                                                              |category                   |
+-------------------------------------------------------------------+---------------------------+
|making an image greyscale with gd library                          |[c#, php, html]            |
|sql server and the guest account what is this for                  |[sql-server, sql, database]|
|c the definitive truth about rand random and arc4random            |[c, c#, c++]               |
|gantt chart controls on windows forms                              |[windows, .net, c#]        |
|is filestream lazy loaded in net                                   |[.net, c#, asp.net]        |
|authoritative source on xml sig                                    |[.net, php, c#]            |
|how to implement a singleton in c #                                |[c#, .net, c]              |
|c # in linux enviro

+-----------------------------------------------------------------------------------+---------------------------+
|input                                                                              |category                   |
+-----------------------------------------------------------------------------------+---------------------------+
|making an image greyscale with gd library                                          |[c#, php, html]            |
|sql server and the guest account what is this for                                  |[sql-server, sql, database]|
|should i have one class for every database i use                                   |[java, c#, sql-server]     |
|carbide / symbian c++ change application icon                                      |[c++, c, windows]          |
|ms sql 2000 turn off logging during stored procedure                               |[sql-server, sql, database]|
|how do i extract the version and path from an svn working copy into a nant variable|[ja

+---------------------------------------------------------------+--------------------------+
|input                                                          |category                  |
+---------------------------------------------------------------+--------------------------+
|making an image greyscale with gd library                      |[c#, php, html]           |
|how to effectively implement sessions in gae                   |[ruby, python, mysql]     |
|game programming and event handlers                            |[c#, .net, asp.net]       |
|how to disable visual studio macro `` tip '' balloon           |[visual-studio, .net, c#] |
|eclipse hide paths in the `` open resource '' dialog           |[java, c++, c]            |
|what is the aspnet_client folder for under the iis structure   |[.net, c#, c++]           |
|vertical text with jquery                                      |[jquery, javascript, html]|
|is it true that the smallest memory allocation in net is a byte|[.net

+----------------------------------------------------------------------------------------------+-----------------------------------------+
|input                                                                                         |category                                 |
+----------------------------------------------------------------------------------------------+-----------------------------------------+
|how to disable visual studio macro `` tip '' balloon                                          |[visual-studio, .net, c#]                |
|the necessity of hiding the salt for a hash                                                   |[c++, c#, java]                          |
|while clause in t sql that loops forever                                                      |[sql, sql-server, mysql]                 |
|why learn perl python ruby if the company is using c++ c # or java as the application language|[c++, c, python]                         |
|what is the best way to se

+-----------------------------------------------------------------------------------+--------------------------+
|input                                                                              |category                  |
+-----------------------------------------------------------------------------------+--------------------------+
|transforming selected text with a hotkey                                           |[c#, python, asp.net]     |
|gantt chart controls on windows forms                                              |[windows, .net, c#]       |
|whats the best way to start using mylyn                                            |[c#, .net, javascript]    |
|how to generate unit test code for methods                                         |[java, c#, .net]          |
|how can i create prototype methods like javascript in c # net                      |[c#, .net, asp.net]       |
|best way to use a db table as a message/job queue                                  |[java, sql,

+-----------------------------------------------------------------------+----------------------------------+
|input                                                                  |category                          |
+-----------------------------------------------------------------------+----------------------------------+
|how does traceroute work                                               |[c#, .net, java]                  |
|how to disable visual studio macro `` tip '' balloon                   |[visual-studio, .net, c#]         |
|parsing t sql to parameterize a query                                  |[sql, sql-server, sql-server-2005]|
|how to add a `` driver '' to javax comm serial port programming in java|[java, c++, python]               |
|determine if type is a pointer in a template function                  |[c++, c, python]                  |
|http/ajax gwt vs eclipse gui for thin client deployment                |[eclipse, java, unit-testing]     |
|statistics engine 

+----------------------------------------------------------------------------------------------+---------------------------+
|input                                                                                         |category                   |
+----------------------------------------------------------------------------------------------+---------------------------+
|sql server and the guest account what is this for                                             |[sql-server, sql, database]|
|build tar file from directory in php without exec/passthru                                    |[php, c++, .net]           |
|how to consume json web services from a windows client                                        |[.net, windows, c#]        |
|whats the best way to start using mylyn                                                       |[c#, .net, javascript]     |
|programmatically launching standalone adobe flashplayer on linux/x11                          |[windows, .net, c#]        |


+--------------------------------------------------------------------+----------------------------------+
|input                                                               |category                          |
+--------------------------------------------------------------------+----------------------------------+
|are incrementers / decrementers var++ var etc thread safe           |[c++, javascript, windows]        |
|programmatically launching standalone adobe flashplayer on linux/x11|[windows, .net, c#]               |
|ms sql 2000 turn off logging during stored procedure                |[sql-server, sql, database]       |
|db side encryption via nhibernate                                   |[wcf, flash, linq-to-sql]         |
|encoding problem classic asp                                        |[asp.net, asp.net-mvc, css]       |
|ant and the available task what if something is not available       |[java, .net, c#]                  |
|vertical text with jquery                    

+--------------------------------------------------------------------+----------------------------------+
|input                                                               |category                          |
+--------------------------------------------------------------------+----------------------------------+
|how can i change html attribute names with jquery                   |[jquery, javascript, html]        |
|should i have one class for every database i use                    |[java, c#, sql-server]            |
|while clause in t sql that loops forever                            |[sql, sql-server, mysql]          |
|doctype rss & html entities                                         |[html, asp.net, css]              |
|how do you quickly find the url for a win32 api on msdn             |[python, c++, c#]                 |
|eclipse hide paths in the `` open resource '' dialog                |[java, c++, c]                    |
|parsing t sql to parameterize a query        

+--------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                 |category                          |
+--------------------------------------------------------------------------------------+----------------------------------+
|sql server and the guest account what is this for                                     |[sql-server, sql, database]       |
|whats the best way to start using mylyn                                               |[c#, .net, javascript]            |
|how do i add custom column to existing wss list template                              |[c#, .net, java]                  |
|how to pass an unpersisted modified object from view back to controller without a form|[c#, .net, javascript]            |
|while clause in t sql that loops forever                                              |[sql, sql-server, mysql]          |
|db side

+---------------------------------------------------------+----------------------------------+
|input                                                    |category                          |
+---------------------------------------------------------+----------------------------------+
|transforming selected text with a hotkey                 |[c#, python, asp.net]             |
|how can i send an array to php through ajax              |[php, html, jquery]               |
|prevent long word to add horizontal scroll to html view  |[asp.net, .net, c#]               |
|how to effectively implement sessions in gae             |[ruby, python, mysql]             |
|carbide / symbian c++ change application icon            |[c++, c, windows]                 |
|are incrementers / decrementers var++ var etc thread safe|[c++, javascript, windows]        |
|sql query count with 0 count                             |[sql, sql-server, sql-server-2005]|
|while clause in t sql that loops forever         

+---------------------------------------------------------------+---------------------------+
|input                                                          |category                   |
+---------------------------------------------------------------+---------------------------+
|sql server and the guest account what is this for              |[sql-server, sql, database]|
|how can i change html attribute names with jquery              |[jquery, javascript, html] |
|how to consume json web services from a windows client         |[.net, windows, c#]        |
|post from one controller action to another not redirect        |[asp.net, html, css]       |
|encoding problem classic asp                                   |[asp.net, asp.net-mvc, css]|
|what is the best way to see what files are locked in subversion|[java, c#, .net]           |
|percentages of subtotal in a report                            |[c++, java, c#]            |
|setting the height of a div dynamically                    

+----------------------------------------------------------------------------+-------------------------+
|input                                                                       |category                 |
+----------------------------------------------------------------------------+-------------------------+
|how do you manage infragistics webgrid data from javascript/ajax code       |[c#, java, .net]         |
|carbide / symbian c++ change application icon                               |[c++, c, windows]        |
|building flex projects in ant/nant                                          |[flex, ruby, silverlight]|
|how do you quickly find the url for a win32 api on msdn                     |[python, c++, c#]        |
|c # in linux environment                                                    |[c#, c, winforms]        |
|c # winforms datagridview/sql compact negative integer in primary key column|[c#, .net, winforms]     |
|suspending and notifying threads when there is work to

+--------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                 |category                          |
+--------------------------------------------------------------------------------------+----------------------------------+
|how to pass an unpersisted modified object from view back to controller without a form|[c#, .net, javascript]            |
|how to generate unit test code for methods                                            |[java, c#, .net]                  |
|what is the best way to see what files are locked in subversion                       |[java, c#, .net]                  |
|is there a way to asynchronously filter an ilist                                      |[python, java, .net]              |
|what is the best way to determine the number of days in a month with javascript       |[javascript, java, c++]           |
|setting

+-----------------------------------------------------------------------------------+----------------------------------+
|input                                                                              |category                          |
+-----------------------------------------------------------------------------------+----------------------------------+
|how can i change html attribute names with jquery                                  |[jquery, javascript, html]        |
|gantt chart controls on windows forms                                              |[windows, .net, c#]               |
|how to consume json web services from a windows client                             |[.net, windows, c#]               |
|game programming and event handlers                                                |[c#, .net, asp.net]               |
|how does traceroute work                                                           |[c#, .net, java]                  |
|sql query count with 0 count   

+---------------------------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                                      |category                          |
+---------------------------------------------------------------------------------------------------------------------------+----------------------------------+
|how can i send an array to php through ajax                                                                                |[php, html, jquery]               |
|should i have one class for every database i use                                                                           |[java, c#, sql-server]            |
|post from one controller action to another not redirect                                                                    |[asp.net, html, css]              |
|how to generate unit test code fo

+---------------------------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                                      |category                          |
+---------------------------------------------------------------------------------------------------------------------------+----------------------------------+
|game programming and event handlers                                                                                        |[c#, .net, asp.net]               |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation                   |[sql-server, sql, sql-server-2005]|
|converting svg to png using c #                                                                                            |[c#, .net, winforms]              |
|how do i extract the version and 

+---------------------------------------------------------------------+------------------------------+
|input                                                                |category                      |
+---------------------------------------------------------------------+------------------------------+
|how can i send an array to php through ajax                          |[php, html, jquery]           |
|how do you manage infragistics webgrid data from javascript/ajax code|[c#, java, .net]              |
|vector shape on stage appears over dynamic textfield                 |[c++, c#, .net]               |
|authoritative source on xml sig                                      |[.net, php, c#]               |
|post from one controller action to another not redirect              |[asp.net, html, css]          |
|eclipse text comparison order                                        |[eclipse, java, unit-testing] |
|decoding chunked http with actionscript                              |[s

+---------------------------------------------------------+---------------------------+
|input                                                    |category                   |
+---------------------------------------------------------+---------------------------+
|gantt chart controls on windows forms                    |[windows, .net, c#]        |
|prevent long word to add horizontal scroll to html view  |[asp.net, .net, c#]        |
|whats the best way to start using mylyn                  |[c#, .net, javascript]     |
|are incrementers / decrementers var++ var etc thread safe|[c++, javascript, windows] |
|implementing and enforcing coding standards              |[c++, java, c#]            |
|c # in linux environment                                 |[c#, c, winforms]          |
|encoding problem classic asp                             |[asp.net, asp.net-mvc, css]|
|vertical text with jquery                                |[jquery, javascript, html] |
|how can i determine the ip of m

+---------------------------------------------------------------------+-----------------------------+
|input                                                                |category                     |
+---------------------------------------------------------------------+-----------------------------+
|build tar file from directory in php without exec/passthru           |[php, c++, .net]             |
|how do you manage infragistics webgrid data from javascript/ajax code|[c#, java, .net]             |
|vector shape on stage appears over dynamic textfield                 |[c++, c#, .net]              |
|mac iwork/pages automation                                           |[flash, wcf, flex]           |
|post from one controller action to another not redirect              |[asp.net, html, css]         |
|eclipse text comparison order                                        |[eclipse, java, unit-testing]|
|how would you attack this polymorphism string building problem       |[c#, c++, .

+----------------------------------------------------------------------------+-----------------------------+
|input                                                                       |category                     |
+----------------------------------------------------------------------------+-----------------------------+
|how can i change html attribute names with jquery                           |[jquery, javascript, html]   |
|how to disable visual studio macro `` tip '' balloon                        |[visual-studio, .net, c#]    |
|best way to use a db table as a message/job queue                           |[java, sql, sql-server]      |
|what is the aspnet_client folder for under the iis structure                |[.net, c#, c++]              |
|eclipse text comparison order                                               |[eclipse, java, unit-testing]|
|class methods as event handlers in javascript                               |[javascript, html, asp.net]  |
|how to serialize a

+-----------------------------------------------------------------------------------+----------------------+
|input                                                                              |category              |
+-----------------------------------------------------------------------------------+----------------------+
|prevent long word to add horizontal scroll to html view                            |[asp.net, .net, c#]   |
|is filestream lazy loaded in net                                                   |[.net, c#, asp.net]   |
|whats the best way to start using mylyn                                            |[c#, .net, javascript]|
|mac iwork/pages automation                                                         |[flash, wcf, flex]    |
|authoritative source on xml sig                                                    |[.net, php, c#]       |
|implementing and enforcing coding standards                                        |[c++, java, c#]       |
|how can i get a li

+------------------------------------------------------------------------------------------+-----------------------------+
|input                                                                                     |category                     |
+------------------------------------------------------------------------------------------+-----------------------------+
|transforming selected text with a hotkey                                                  |[c#, python, asp.net]        |
|sql server and the guest account what is this for                                         |[sql-server, sql, database]  |
|how do i add custom column to existing wss list template                                  |[c#, .net, java]             |
|ant and the available task what if something is not available                             |[java, .net, c#]             |
|what is the best way to determine the number of days in a month with javascript           |[javascript, java, c++]      |
|setting the hei

# Approach 2: pandas_udf (via PyArrow)

Note: We need to use pyarrow==0.14.1. See [this](https://stackoverflow.com/questions/58878848/java-lang-illegalargumentexception-when-applying-a-python-udf-to-a-spark-datafra) Stackoverflow question.

Testing *predict_serie* function (which is used in *make_pandas_udf*). 

In [9]:
pdf_sample = df_input.sample(False,fraction=0.10,seed=12345).toPandas()

In [10]:
pd.concat([pdf_sample.input.head(10),predict_serie(pdf_sample.input,False).head(10).rename("category")],axis=1)

Unnamed: 0,input,category
0,is filestream lazy loaded in net,.net
1,programmatically launching standalone adobe fl...,windows
2,encoding problem classic asp,asp.net
3,c # winforms datagridview/sql compact negative...,c#
4,suspending and notifying threads when there is...,java
5,creating my own iterators,c#
6,css `` see through '' background crazy navigat...,asp.net
7,sending email in net through gmail,.net
8,specify ordinals of c++ exported functions in ...,c++
9,in c # or any language what is/are your favour...,c#


In [11]:
pd.concat([pdf_sample.input.head(10),predict_serie(pdf_sample.input,True).head(10).rename("category")],axis=1)

Unnamed: 0,input,category
0,is filestream lazy loaded in net,"[.net, c#, asp.net]"
1,programmatically launching standalone adobe fl...,"[windows, .net, c#]"
2,encoding problem classic asp,"[asp.net, asp.net-mvc, css]"
3,c # winforms datagridview/sql compact negative...,"[c#, .net, winforms]"
4,suspending and notifying threads when there is...,"[java, c#, .net]"
5,creating my own iterators,"[c#, .net, asp.net]"
6,css `` see through '' background crazy navigat...,"[asp.net, javascript, html]"
7,sending email in net through gmail,"[.net, c#, asp.net]"
8,specify ordinals of c++ exported functions in ...,"[c++, c, java]"
9,in c # or any language what is/are your favour...,"[c#, .net, asp.net]"


## Single prediction

In [12]:
udf_predict = make_pandas_udf(multi_prediction=False)
df_output = df_input.withColumn("category",udf_predict(col("input")))
%timeit -n 20 df_output.sample(False,.10).show(10,False)

+----------------------------------------------------------------------------------------------+----------+
|input                                                                                         |category  |
+----------------------------------------------------------------------------------------------+----------+
|sql server and the guest account what is this for                                             |sql-server|
|prevent long word to add horizontal scroll to html view                                       |asp.net   |
|programmatically launching standalone adobe flashplayer on linux/x11                          |windows   |
|doctype rss & html entities                                                                   |html      |
|why learn perl python ruby if the company is using c++ c # or java as the application language|c++       |
|c # lambda expressions or delegates as a properties or arguments                              |c#        |
|encoding problem classic as

+-------------------------------------------------------------------------------+-----------+
|input                                                                          |category   |
+-------------------------------------------------------------------------------+-----------+
|are incrementers / decrementers var++ var etc thread safe                      |c++        |
|how to implement a singleton in c #                                            |c#         |
|c # in linux environment                                                       |c#         |
|how can i create prototype methods like javascript in c # net                  |c#         |
|what is the best way to determine the number of days in a month with javascript|javascript |
|css `` see through '' background crazy navigation menu problem                 |asp.net    |
|html over flash without stopping interaction with flash                        |asp.net-mvc|
|is there an alternative for sleep in c                     

+--------------------------------------------------------------------------------------------------------+----------+
|input                                                                                                   |category  |
+--------------------------------------------------------------------------------------------------------+----------+
|transforming selected text with a hotkey                                                                |c#        |
|sql server and the guest account what is this for                                                       |sql-server|
|how can i send an array to php through ajax                                                             |php       |
|how to consume json web services from a windows client                                                  |.net      |
|python beyond the basics                                                                                |python    |
|can sql server express be used to effectively administr

+-----------------------------------------------------------------------+-------------+
|input                                                                  |category     |
+-----------------------------------------------------------------------+-------------+
|sql server and the guest account what is this for                      |sql-server   |
|prevent long word to add horizontal scroll to html view                |asp.net      |
|how can i get a list of available wireless networks on linux           |python       |
|how do you quickly find the url for a win32 api on msdn                |python       |
|db side encryption via nhibernate                                      |wcf          |
|can you set or where is the local document root                        |c#           |
|why is visual studio constantly crashing                               |visual-studio|
|decoding chunked http with actionscript                                |silverlight  |
|how to add a `` driver '' to ja

+------------------------------------------------------------+----------+
|input                                                       |category  |
+------------------------------------------------------------+----------+
|transforming selected text with a hotkey                    |c#        |
|c the definitive truth about rand random and arc4random     |c         |
|whats the best way to start using mylyn                     |c#        |
|how does traceroute work                                    |c#        |
|ms sql 2000 turn off logging during stored procedure        |sql-server|
|how can i get a list of available wireless networks on linux|python    |
|how to return a page of results from sql                    |sql-server|
|post from one controller action to another not redirect     |asp.net   |
|db side encryption via nhibernate                           |wcf       |
|how do i call net code c # /vb net from vbscript            |c#        |
+-------------------------------------

+--------------------------------------------------------------------------------------+-------------+
|input                                                                                 |category     |
+--------------------------------------------------------------------------------------+-------------+
|how can i change html attribute names with jquery                                     |jquery       |
|how to pass an unpersisted modified object from view back to controller without a form|c#           |
|building flex projects in ant/nant                                                    |flex         |
|how to disable visual studio macro `` tip '' balloon                                  |visual-studio|
|authoritative source on xml sig                                                       |.net         |
|post from one controller action to another not redirect                               |asp.net      |
|what is the aspnet_client folder for under the iis structure            

+-------------------------------------------------------------------+-----------+
|input                                                              |category   |
+-------------------------------------------------------------------+-----------+
|gantt chart controls on windows forms                              |windows    |
|daemon threads explanation                                         |c#         |
|what s the term for design ala `` object method1 method2 method3 ``|c#         |
|should db layer members be static or instance                      |java       |
|what techniques can you use to profile your code                   |c#         |
|html over flash without stopping interaction with flash            |asp.net-mvc|
|is there an alternative for sleep in c                             |c#         |
|protecting javascript code in a ff extension                       |javascript |
|using linq with wcf                                                |asp.net-mvc|
|apply css dynam

+------------------------------------------------------------+--------+
|input                                                       |category|
+------------------------------------------------------------+--------+
|making an image greyscale with gd library                   |c#      |
|how do i add custom column to existing wss list template    |c#      |
|are incrementers / decrementers var++ var etc thread safe   |c++     |
|how to implement a singleton in c #                         |c#      |
|while clause in t sql that loops forever                    |sql     |
|how can i get a list of available wireless networks on linux|python  |
|c # in linux environment                                    |c#      |
|what is the aspnet_client folder for under the iis structure|.net    |
|what design pattern to use for user authentication in java  |java    |
|sending email in net through gmail                          |.net    |
+------------------------------------------------------------+--

+--------------------------------------------------------------------------------------+-----------+
|input                                                                                 |category   |
+--------------------------------------------------------------------------------------+-----------+
|making an image greyscale with gd library                                             |c#         |
|prevent long word to add horizontal scroll to html view                               |asp.net    |
|implementing and enforcing coding standards                                           |c++        |
|converting svg to png using c #                                                       |c#         |
|is there a way to asynchronously filter an ilist                                      |python     |
|how do i focus a foreign window                                                       |python     |
|decoding chunked http with actionscript                                               |sil

+-------------------------------------------------------+----------+
|input                                                  |category  |
+-------------------------------------------------------+----------+
|c the definitive truth about rand random and arc4random|c         |
|gantt chart controls on windows forms                  |windows   |
|how to consume json web services from a windows client |.net      |
|mac iwork/pages automation                             |flash     |
|how to return a page of results from sql               |sql-server|
|eclipse text comparison order                          |eclipse   |
|is there a way to asynchronously filter an ilist       |python    |
|be notified when visual/logical child added/removed    |c#        |
|class methods as event handlers in javascript          |javascript|
|should db layer members be static or instance          |java      |
+-------------------------------------------------------+----------+
only showing top 10 rows

+-------

+-----------------------------------------------------------------------------------+----------+
|input                                                                              |category  |
+-----------------------------------------------------------------------------------+----------+
|gantt chart controls on windows forms                                              |windows   |
|how do you deal with connection strings when deploying an asp net site             |asp.net   |
|game programming and event handlers                                                |c#        |
|mac iwork/pages automation                                                         |flash     |
|how to return a page of results from sql                                           |sql-server|
|how do i extract the version and path from an svn working copy into a nant variable|java      |
|should i provide a deep clone when implementing icloneable                         |java      |
|what techniques can you use t

+--------------------------------------------------------------------------------------+-----------+
|input                                                                                 |category   |
+--------------------------------------------------------------------------------------+-----------+
|how to consume json web services from a windows client                                |.net       |
|how do i add custom column to existing wss list template                              |c#         |
|how to pass an unpersisted modified object from view back to controller without a form|c#         |
|how to generate unit test code for methods                                            |java       |
|best way to use a db table as a message/job queue                                     |java       |
|how do i focus a foreign window                                                       |python     |
|why is app_offline failing to work as soon as you it starts loading dlls              |.ne

+--------------------------------------------------------------+-----------+
|input                                                         |category   |
+--------------------------------------------------------------+-----------+
|mac iwork/pages automation                                    |flash      |
|the necessity of hiding the salt for a hash                   |c++        |
|authoritative source on xml sig                               |.net       |
|c # in linux environment                                      |c#         |
|how to generate unit test code for methods                    |java       |
|how would you attack this polymorphism string building problem|c#         |
|how do i focus a foreign window                               |python     |
|image archive vs image strip                                  |css        |
|decoding chunked http with actionscript                       |silverlight|
|authenticating against active directory with java on linux    |java       |

+----------------------------------------------------------------------------+-----------+
|input                                                                       |category   |
+----------------------------------------------------------------------------+-----------+
|vector shape on stage appears over dynamic textfield                        |c++        |
|ms sql 2000 turn off logging during stored procedure                        |sql-server |
|doctype rss & html entities                                                 |html       |
|how can i get a list of available wireless networks on linux                |python     |
|ant and the available task what if something is not available               |java       |
|how to do unit testing with uncertainties                                   |java       |
|cross platform editor control                                               |python     |
|protecting javascript code in a ff extension                                |javascript |

+----------------------------------------------------------------------------+----------+
|input                                                                       |category  |
+----------------------------------------------------------------------------+----------+
|how do you manage infragistics webgrid data from javascript/ajax code       |c#        |
|python beyond the basics                                                    |python    |
|how do you deal with connection strings when deploying an asp net site      |asp.net   |
|getting odd error on net executenonquery                                    |.net      |
|authoritative source on xml sig                                             |.net      |
|how to generate unit test code for methods                                  |java      |
|c # winforms datagridview/sql compact negative integer in primary key column|c#        |
|is there a way to asynchronously filter an ilist                            |python    |
|class met

+--------------------------------------------------------------------+--------+
|input                                                               |category|
+--------------------------------------------------------------------+--------+
|gantt chart controls on windows forms                               |windows |
|how to consume json web services from a windows client              |.net    |
|the necessity of hiding the salt for a hash                         |c++     |
|programmatically launching standalone adobe flashplayer on linux/x11|windows |
|how to implement a singleton in c #                                 |c#      |
|while clause in t sql that loops forever                            |sql     |
|best way to use a db table as a message/job queue                   |java    |
|how can i determine the ip of my router/gateway in java             |java    |
|stopping msi from launching an exe in the system context            |.net    |
|how to serialize an object to xml witho

+----------------------------------------------------------------------------+-------------+
|input                                                                       |category     |
+----------------------------------------------------------------------------+-------------+
|sql server and the guest account what is this for                           |sql-server   |
|gantt chart controls on windows forms                                       |windows      |
|build tar file from directory in php without exec/passthru                  |php          |
|sql query count with 0 count                                                |sql          |
|how to disable visual studio macro `` tip '' balloon                        |visual-studio|
|implementing and enforcing coding standards                                 |c++          |
|how to generate unit test code for methods                                  |java         |
|is it possible to define in a dependent dll s application config     

+-------------------------------------------------------+----------+
|input                                                  |category  |
+-------------------------------------------------------+----------+
|encoding problem classic asp                           |asp.net   |
|how can i determine the ip of my router/gateway in java|java      |
|parsing t sql to parameterize a query                  |sql       |
|sending email in net through gmail                     |.net      |
|which css tag creates a box like this with title       |javascript|
|config values in db or file                            |c#        |
|virtual directory in asp net project                   |asp.net   |
|reading an xml file using fileinputstream for java     |java      |
|how to synchronize two subversion repositories         |c#        |
|tool for viewing x 509 certificates                    |windows   |
+-------------------------------------------------------+----------+
only showing top 10 rows

+-------

+------------------------------------------------------------------------+----------+
|input                                                                   |category  |
+------------------------------------------------------------------------+----------+
|c the definitive truth about rand random and arc4random                 |c         |
|how do i add custom column to existing wss list template                |c#        |
|how does traceroute work                                                |c#        |
|getting odd error on net executenonquery                                |.net      |
|how to return a page of results from sql                                |sql-server|
|how do you quickly find the url for a win32 api on msdn                 |python    |
|vertical text with jquery                                               |jquery    |
|setting the height of a div dynamically                                 |c#        |
|can you set or where is the local document root      

+----------------------------------------------------------------------------+----------+
|input                                                                       |category  |
+----------------------------------------------------------------------------+----------+
|how can i change html attribute names with jquery                           |jquery    |
|is filestream lazy loaded in net                                            |.net      |
|programmatically launching standalone adobe flashplayer on linux/x11        |windows   |
|ms sql 2000 turn off logging during stored procedure                        |sql-server|
|how do i call net code c # /vb net from vbscript                            |c#        |
|c # winforms datagridview/sql compact negative integer in primary key column|c#        |
|suspending and notifying threads when there is work to do                   |java      |
|is there a way to asynchronously filter an ilist                            |python    |
|how would

+---------------------------------------------------------------------------------------------------------------------------+-------------+
|input                                                                                                                      |category     |
+---------------------------------------------------------------------------------------------------------------------------+-------------+
|how to disable visual studio macro `` tip '' balloon                                                                       |visual-studio|
|how do i call net code c # /vb net from vbscript                                                                           |c#           |
|vertical text with jquery                                                                                                  |jquery       |
|what design pattern to use for user authentication in java                                                                 |java         |
|what is the simples

+------------------------------------------------------------------------------------+----------+
|input                                                                               |category  |
+------------------------------------------------------------------------------------+----------+
|transforming selected text with a hotkey                                            |c#        |
|eclipse hide paths in the `` open resource '' dialog                                |java      |
|class methods as event handlers in javascript                                       |javascript|
|what s the term for design ala `` object method1 method2 method3 ``                 |c#        |
|sorting and grouping nested lists in python                                         |python    |
|how do you optimise your javascript                                                 |javascript|
|patterns for the overlap of two objects                                             |java      |
|can t create a subv

## Multi prediction

In [13]:
udf_predict = make_pandas_udf(multi_prediction=True)
df_output = df_input.withColumn("category",udf_predict(col("input")))
%timeit -n 20 df_output.sample(False,.10).show(10,False)

+----------------------------------------------------------------------+--------------------------+
|input                                                                 |category                  |
+----------------------------------------------------------------------+--------------------------+
|how do you deal with connection strings when deploying an asp net site|[asp.net, c#, asp.net-mvc]|
|how do i add custom column to existing wss list template              |[c#, .net, java]          |
|best update method for mysql db                                       |[mysql, sql, database]    |
|how to generate unit test code for methods                            |[java, c#, .net]          |
|daemon threads explanation                                            |[c#, svn, algorithm]      |
|is there a way to asynchronously filter an ilist                      |[python, java, .net]      |
|2d javascript array                                                   |[javascript, jquery, html]|


+-------------------------------------------------------------------+-----------------------------+
|input                                                              |category                     |
+-------------------------------------------------------------------+-----------------------------+
|how can i send an array to php through ajax                        |[php, html, jquery]          |
|c the definitive truth about rand random and arc4random            |[c, c#, c++]                 |
|how to effectively implement sessions in gae                       |[ruby, python, mysql]        |
|while clause in t sql that loops forever                           |[sql, sql-server, mysql]     |
|post from one controller action to another not redirect            |[asp.net, html, css]         |
|percentages of subtotal in a report                                |[c++, java, c#]              |
|what s the term for design ala `` object method1 method2 method3 ``|[c#, python, c]              |


+--------------------------------------------------------------+----------------------------------+
|input                                                         |category                          |
+--------------------------------------------------------------+----------------------------------+
|are incrementers / decrementers var++ var etc thread safe     |[c++, javascript, windows]        |
|sql query count with 0 count                                  |[sql, sql-server, sql-server-2005]|
|how to disable visual studio macro `` tip '' balloon          |[visual-studio, .net, c#]         |
|how to implement a singleton in c #                           |[c#, .net, c]                     |
|how can i create prototype methods like javascript in c # net |[c#, .net, asp.net]               |
|best way to use a db table as a message/job queue             |[java, sql, sql-server]           |
|ant and the available task what if something is not available |[java, .net, c#]                  |


+----------------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                           |category                          |
+----------------------------------------------------------------------------------------------------------------+----------------------------------+
|should i have one class for every database i use                                                                |[java, c#, sql-server]            |
|eclipse hide paths in the `` open resource '' dialog                                                            |[java, c++, c]                    |
|what is the best way to determine the number of days in a month with javascript                                 |[javascript, java, c++]           |
|be notified when visual/logical child added/removed                                                

+------------------------------------------------------------------------------------------+--------------------------+
|input                                                                                     |category                  |
+------------------------------------------------------------------------------------------+--------------------------+
|c the definitive truth about rand random and arc4random                                   |[c, c#, c++]              |
|how to consume json web services from a windows client                                    |[.net, windows, c#]       |
|prevent long word to add horizontal scroll to html view                                   |[asp.net, .net, c#]       |
|mac iwork/pages automation                                                                |[flash, wcf, flex]        |
|carbide / symbian c++ change application icon                                             |[c++, c, windows]         |
|can you use an alias in the where claus

+----------------------------------------------------------------------------------------------------------------+---------------------------+
|input                                                                                                           |category                   |
+----------------------------------------------------------------------------------------------------------------+---------------------------+
|carbide / symbian c++ change application icon                                                                   |[c++, c, windows]          |
|best update method for mysql db                                                                                 |[mysql, sql, database]     |
|implementing and enforcing coding standards                                                                     |[c++, java, c#]            |
|db side encryption via nhibernate                                                                               |[wcf, flash, linq-to-sql]  |

+--------------------------------------------------------------------------------------+-------------------------+
|input                                                                                 |category                 |
+--------------------------------------------------------------------------------------+-------------------------+
|gantt chart controls on windows forms                                                 |[windows, .net, c#]      |
|is filestream lazy loaded in net                                                      |[.net, c#, asp.net]      |
|how to pass an unpersisted modified object from view back to controller without a form|[c#, .net, javascript]   |
|building flex projects in ant/nant                                                    |[flex, ruby, silverlight]|
|how do i call net code c # /vb net from vbscript                                      |[c#, .net, asp.net]      |
|daemon threads explanation                                                     

+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|how does traceroute work                                                                                |[c#, .net, java]                  |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|[sql-server, sql, sql-server-2005]|
|how to return a page of results from sql                                                                |[sql-server, c#, sql]             |
|post from one controller action to another not redirect                                                 |[asp.net, html, css]              |
|ant a

+----------------------------------------------------------------------+-----------------------------------------+
|input                                                                 |category                                 |
+----------------------------------------------------------------------+-----------------------------------------+
|transforming selected text with a hotkey                              |[c#, python, asp.net]                    |
|how do you manage infragistics webgrid data from javascript/ajax code |[c#, java, .net]                         |
|how do you deal with connection strings when deploying an asp net site|[asp.net, c#, asp.net-mvc]               |
|best update method for mysql db                                       |[mysql, sql, database]                   |
|sql query count with 0 count                                          |[sql, sql-server, sql-server-2005]       |
|programmatically launching standalone adobe flashplayer on linux/x11  |[windows

+----------------------------------------------------------+-----------------------------------------+
|input                                                     |category                                 |
+----------------------------------------------------------+-----------------------------------------+
|how can i send an array to php through ajax               |[php, html, jquery]                      |
|how to consume json web services from a windows client    |[.net, windows, c#]                      |
|how to effectively implement sessions in gae              |[ruby, python, mysql]                    |
|doctype rss & html entities                               |[html, asp.net, css]                     |
|how can i determine the ip of my router/gateway in java   |[java, c++, c#]                          |
|image archive vs image strip                              |[css, asp.net-mvc, asp.net]              |
|should db layer members be static or instance             |[java, asp.ne

+----------------------------------------------------------+------------------------------+
|input                                                     |category                      |
+----------------------------------------------------------+------------------------------+
|how can i send an array to php through ajax               |[php, html, jquery]           |
|how to consume json web services from a windows client    |[.net, windows, c#]           |
|how to return a page of results from sql                  |[sql-server, c#, sql]         |
|how to generate unit test code for methods                |[java, c#, .net]              |
|converting svg to png using c #                           |[c#, .net, winforms]          |
|eclipse hide paths in the `` open resource '' dialog      |[java, c++, c]                |
|percentages of subtotal in a report                       |[c++, java, c#]               |
|how can i determine the ip of my router/gateway in java   |[java, c++, c#]     

+----------------------------------------------------------------------------------------------+-----------------------------+
|input                                                                                         |category                     |
+----------------------------------------------------------------------------------------------+-----------------------------+
|transforming selected text with a hotkey                                                      |[c#, python, asp.net]        |
|sql server and the guest account what is this for                                             |[sql-server, sql, database]  |
|how do you deal with connection strings when deploying an asp net site                        |[asp.net, c#, asp.net-mvc]   |
|building flex projects in ant/nant                                                            |[flex, ruby, silverlight]    |
|best update method for mysql db                                                               |[mysql, sql, da

+----------------------------------------------------------------+--------------------------+
|input                                                           |category                  |
+----------------------------------------------------------------+--------------------------+
|transforming selected text with a hotkey                        |[c#, python, asp.net]     |
|whats the best way to start using mylyn                         |[c#, .net, javascript]    |
|should i have one class for every database i use                |[java, c#, sql-server]    |
|carbide / symbian c++ change application icon                   |[c++, c, windows]         |
|building flex projects in ant/nant                              |[flex, ruby, silverlight] |
|is there a way to asynchronously filter an ilist                |[python, java, .net]      |
|setting the height of a div dynamically                         |[c#, javascript, .net]    |
|how can i determine the ip of my router/gateway in java    

+----------------------------------------------------------------------------+----------------------------+
|input                                                                       |category                    |
+----------------------------------------------------------------------------+----------------------------+
|best update method for mysql db                                             |[mysql, sql, database]      |
|c # lambda expressions or delegates as a properties or arguments            |[c#, .net, asp.net]         |
|db side encryption via nhibernate                                           |[wcf, flash, linq-to-sql]   |
|setting the height of a div dynamically                                     |[c#, javascript, .net]      |
|is it true that the smallest memory allocation in net is a byte             |[.net, c#, asp.net]         |
|what are the limits of ruby on rails                                        |[ruby, ruby-on-rails, mysql]|
|in c # or any language what

+----------------------------------------------------------------------------+-----------------------------------------+
|input                                                                       |category                                 |
+----------------------------------------------------------------------------+-----------------------------------------+
|transforming selected text with a hotkey                                    |[c#, python, asp.net]                    |
|how do you manage infragistics webgrid data from javascript/ajax code       |[c#, java, .net]                         |
|how to disable visual studio macro `` tip '' balloon                        |[visual-studio, .net, c#]                |
|while clause in t sql that loops forever                                    |[sql, sql-server, mysql]                 |
|db side encryption via nhibernate                                           |[wcf, flash, linq-to-sql]                |
|c # winforms datagridview/sql c

+-------------------------------------------------------------------+---------------------------+
|input                                                              |category                   |
+-------------------------------------------------------------------+---------------------------+
|how can i send an array to php through ajax                        |[php, html, jquery]        |
|while clause in t sql that loops forever                           |[sql, sql-server, mysql]   |
|c # in linux environment                                           |[c#, c, winforms]          |
|how can i create prototype methods like javascript in c # net      |[c#, .net, asp.net]        |
|is it possible to define in a dependent dll s application config   |[c#, .net, asp.net]        |
|eclipse hide paths in the `` open resource '' dialog               |[java, c++, c]             |
|can you use an alias in the where clause in mysql                  |[mysql, ruby, sql]         |
|how would you attac

+----------------------------------------------------------+----------------------------------+
|input                                                     |category                          |
+----------------------------------------------------------+----------------------------------+
|making an image greyscale with gd library                 |[c#, php, html]                   |
|sql server and the guest account what is this for         |[sql-server, sql, database]       |
|build tar file from directory in php without exec/passthru|[php, c++, .net]                  |
|building flex projects in ant/nant                        |[flex, ruby, silverlight]         |
|best update method for mysql db                           |[mysql, sql, database]            |
|doctype rss & html entities                               |[html, asp.net, css]              |
|encoding problem classic asp                              |[asp.net, asp.net-mvc, css]       |
|how do i focus a foreign window        

+-----------------------------------------------------------------------------------+--------------------------+
|input                                                                              |category                  |
+-----------------------------------------------------------------------------------+--------------------------+
|transforming selected text with a hotkey                                           |[c#, python, asp.net]     |
|how can i change html attribute names with jquery                                  |[jquery, javascript, html]|
|c the definitive truth about rand random and arc4random                            |[c, c#, c++]              |
|getting odd error on net executenonquery                                           |[.net, c#, asp.net]       |
|best update method for mysql db                                                    |[mysql, sql, database]    |
|the necessity of hiding the salt for a hash                                        |[c++, c#, j

+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|gantt chart controls on windows forms                                                                   |[windows, .net, c#]               |
|how do i add custom column to existing wss list template                                                |[c#, .net, java]                  |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|[sql-server, sql, sql-server-2005]|
|sql query count with 0 count                                                                            |[sql, sql-server, sql-server-2005]|
|autho

+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|how to consume json web services from a windows client                                                  |[.net, windows, c#]               |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|[sql-server, sql, sql-server-2005]|
|building flex projects in ant/nant                                                                      |[flex, ruby, silverlight]         |
|how to disable visual studio macro `` tip '' balloon                                                    |[visual-studio, .net, c#]         |
|imple

+--------------------------------------------------------------------------------------+-----------------------+
|input                                                                                 |category               |
+--------------------------------------------------------------------------------------+-----------------------+
|how to pass an unpersisted modified object from view back to controller without a form|[c#, .net, javascript] |
|game programming and event handlers                                                   |[c#, .net, asp.net]    |
|best update method for mysql db                                                       |[mysql, sql, database] |
|programmatically launching standalone adobe flashplayer on linux/x11                  |[windows, .net, c#]    |
|how to return a page of results from sql                                              |[sql-server, c#, sql]  |
|converting svg to png using c #                                                       |[c#, .ne

+-----------------------------------------------------------------------------------+---------------------------+
|input                                                                              |category                   |
+-----------------------------------------------------------------------------------+---------------------------+
|how can i change html attribute names with jquery                                  |[jquery, javascript, html] |
|gantt chart controls on windows forms                                              |[windows, .net, c#]        |
|vector shape on stage appears over dynamic textfield                               |[c++, c#, .net]            |
|the necessity of hiding the salt for a hash                                        |[c++, c#, java]            |
|how can i get a list of available wireless networks on linux                       |[python, c#, .net]         |
|how to return a page of results from sql                                           |[sq

+--------------------------------------------------------------------------------------------------------+----------------------------------+
|input                                                                                                   |category                          |
+--------------------------------------------------------------------------------------------------------+----------------------------------+
|how do you deal with connection strings when deploying an asp net site                                  |[asp.net, c#, asp.net-mvc]        |
|can sql server express be used to effectively administrate a sql server standard/enterprise installation|[sql-server, sql, sql-server-2005]|
|carbide / symbian c++ change application icon                                                           |[c++, c, windows]                 |
|setting the height of a div dynamically                                                                 |[c#, javascript, .net]            |
|how c

+----------------------------------------------------------------------------+---------------------------+
|input                                                                       |category                   |
+----------------------------------------------------------------------------+---------------------------+
|how to consume json web services from a windows client                      |[.net, windows, c#]        |
|how to implement a singleton in c #                                         |[c#, .net, c]              |
|c # lambda expressions or delegates as a properties or arguments            |[c#, .net, asp.net]        |
|encoding problem classic asp                                                |[asp.net, asp.net-mvc, css]|
|best way to use a db table as a message/job queue                           |[java, sql, sql-server]    |
|how do i call net code c # /vb net from vbscript                            |[c#, .net, asp.net]        |
|class methods as event handlers in j

+--------------------------------------------------------------------+-----------------------------+
|input                                                               |category                     |
+--------------------------------------------------------------------+-----------------------------+
|making an image greyscale with gd library                           |[c#, php, html]              |
|programmatically launching standalone adobe flashplayer on linux/x11|[windows, .net, c#]          |
|c # lambda expressions or delegates as a properties or arguments    |[c#, .net, asp.net]          |
|how do you quickly find the url for a win32 api on msdn             |[python, c++, c#]            |
|db side encryption via nhibernate                                   |[wcf, flash, linq-to-sql]    |
|encoding problem classic asp                                        |[asp.net, asp.net-mvc, css]  |
|is it possible to define in a dependent dll s application config    |[c#, .net, asp.net]  

## From ``array<string>`` type to `string` type

In [14]:
df_output.withColumn('category', concat_ws('|', 'category')).show(20,False)

+--------------------------------------------------------------------------------------+-----------------------+
|input                                                                                 |category               |
+--------------------------------------------------------------------------------------+-----------------------+
|making an image greyscale with gd library                                             |c#|php|html            |
|transforming selected text with a hotkey                                              |c#|python|asp.net      |
|sql server and the guest account what is this for                                     |sql-server|sql|database|
|how can i change html attribute names with jquery                                     |jquery|javascript|html |
|how can i send an array to php through ajax                                           |php|html|jquery        |
|c the definitive truth about rand random and arc4random                               |c|c#|c++