diff --git a/README.md b/README.md
index b007ce38..f2303695 100644
--- a/README.md
+++ b/README.md
@@ -157,4 +157,4 @@ Mobius is licensed under the MIT license. See [LICENSE](LICENSE) file for full l
   * tweet [@MobiusForSpark](http://twitter.com/MobiusForSpark)
 
 ## Code of Conduct
-This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
\ No newline at end of file
diff --git a/appveyor.yml b/appveyor.yml
index b7a50cef..4eb27748 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,4 +1,4 @@
-version: 2.0.2-SNAPSHOT.{build}
+version: 2.3.1-SNAPSHOT.{build}
 
 environment:
   securefile:          
diff --git a/build/Build.cmd b/build/Build.cmd
index 05239aca..485aa3b0 100644
--- a/build/Build.cmd
+++ b/build/Build.cmd
@@ -6,6 +6,8 @@ rem Copyright (c) Microsoft. All rights reserved.
 rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
 rem
 
+SET MAVEN_OPTS=-Dhttps.protocols=TLSv1,TLSv1.1,TLSv1.2
+
 if "%1" == "csharp" set buildCSharp=true
 
 SET CMDHOME=%~dp0
diff --git a/build/localmode/RunSamples.cmd b/build/localmode/RunSamples.cmd
index b9690e3f..57872f73 100644
--- a/build/localmode/RunSamples.cmd
+++ b/build/localmode/RunSamples.cmd
@@ -47,7 +47,7 @@ if "%precheck%" == "bad" (goto :EOF)
 @rem 
 @rem setup Hadoop and Spark versions
 @rem
-set SPARK_VERSION=2.0.2
+set SPARK_VERSION=2.3.1
 set HADOOP_VERSION=2.6
 set APACHE_DIST_SERVER=archive.apache.org
 @echo [RunSamples.cmd] SPARK_VERSION=%SPARK_VERSION%, HADOOP_VERSION=%HADOOP_VERSION%, APACHE_DIST_SERVER=%APACHE_DIST_SERVER%
@@ -100,7 +100,7 @@ if "!USER_EXE!"=="" (
     call sparkclr-submit.cmd --conf spark.sql.warehouse.dir=%TEMP_DIR% %*
 )
 
-@if ERRORLEVEL 1 GOTO :ErrorStop
+@if ERRORLEVEL 2 GOTO :ErrorStop
 
 @GOTO :EOF
 
diff --git a/build/localmode/downloadtools.ps1 b/build/localmode/downloadtools.ps1
index c42ab8ae..512a23f3 100644
--- a/build/localmode/downloadtools.ps1
+++ b/build/localmode/downloadtools.ps1
@@ -20,7 +20,7 @@ if ($stage.ToLower() -eq "run")
     $hadoopVersion = if ($envValue -eq $null) { "2.6" } else { $envValue }
     
     $envValue = [Environment]::GetEnvironmentVariable("SPARK_VERSION")
-    $sparkVersion = if ($envValue -eq $null) { "2.0.2" } else { $envValue }
+    $sparkVersion = if ($envValue -eq $null) { "2.3.1" } else { $envValue }
 
     Write-Output "[downloadtools] hadoopVersion=$hadoopVersion, sparkVersion=$sparkVersion, apacheDistServer=$apacheDistServer"
 }
diff --git a/build/localmode/run-samples.sh b/build/localmode/run-samples.sh
index 685507d3..24d4f3db 100755
--- a/build/localmode/run-samples.sh
+++ b/build/localmode/run-samples.sh
@@ -16,7 +16,7 @@ do
 done
 
 # setup Hadoop and Spark versions
-export SPARK_VERSION=2.0.2
+export SPARK_VERSION=2.3.1
 export HADOOP_VERSION=2.6
 export APACHE_DIST_SERVER=archive.apache.org
 echo "[run-samples.sh] SPARK_VERSION=$SPARK_VERSION, HADOOP_VERSION=$HADOOP_VERSION, APACHE_DIST_SERVER=$APACHE_DIST_SERVER"
diff --git a/cpp/Riosock/Riosock.vcxproj b/cpp/Riosock/Riosock.vcxproj
index d61d067c..95b642db 100644
--- a/cpp/Riosock/Riosock.vcxproj
+++ b/cpp/Riosock/Riosock.vcxproj
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|x64">
       <Configuration>Debug</Configuration>
@@ -20,13 +20,13 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v140</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v140</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
index d887daf8..72341a3f 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
@@ -35,16 +35,17 @@
     <ErrorReport>prompt</ErrorReport>
     <WarningLevel>4</WarningLevel>
     <DocumentationFile>..\documentation\Microsoft.Spark.CSharp.Adapter.Doc.XML</DocumentationFile>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <PropertyGroup>
     <StartupObject />
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="log4net">
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+    <Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <HintPath>..\..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Newtonsoft.Json">
-      <HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
+    <Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
+      <HintPath>..\..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
     <Reference Include="Razorvine.Pyrolite">
       <HintPath>..\..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
@@ -98,6 +99,7 @@
     <Compile Include="Network\RioNative.cs" />
     <Compile Include="Network\RioSocketWrapper.cs" />
     <Compile Include="Network\SaeaSocketWrapper.cs" />
+    <Compile Include="Network\SocketInfo.cs" />
     <Compile Include="Network\SocketStream.cs" />
     <Compile Include="Network\SockDataToken.cs" />
     <Compile Include="Network\SocketFactory.cs" />
@@ -184,6 +186,7 @@
   <ItemGroup>
     <None Include="packages.config" />
   </ItemGroup>
+  <ItemGroup />
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <PropertyGroup>
     <PostBuildEvent>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/IRDDCollector.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/IRDDCollector.cs
index b8b078c2..51250de6 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/IRDDCollector.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/IRDDCollector.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Collections.Generic;
+using Microsoft.Spark.CSharp.Network;
 
 namespace Microsoft.Spark.CSharp.Core
 {
@@ -11,6 +12,6 @@ namespace Microsoft.Spark.CSharp.Core
     /// </summary>
     interface IRDDCollector
     {
-        IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type);
+        IEnumerable<dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type);
     }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
index bdfbd981..9dfd1198 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
@@ -4,6 +4,7 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Proxy;
 using Microsoft.Spark.CSharp.Services;
 
@@ -60,6 +61,7 @@ public SparkContext SparkContext
             {
                 return sparkContext;
             }
+	        set { sparkContext = value; }
         }
 
         /// <summary>
@@ -592,13 +594,13 @@ public void ForeachPartition(Action<IEnumerable<T>> f)
         /// <returns></returns>
         public T[] Collect()
         {
-            int port = RddProxy.CollectAndServe();
-            return Collect(port).Cast<T>().ToArray();
+            var info = RddProxy.CollectAndServe();
+            return Collect(info).Cast<T>().ToArray();
         }
 
-        internal IEnumerable<dynamic> Collect(int port)
+        internal IEnumerable<dynamic> Collect(SocketInfo info)
         {
-            return RddProxy.RDDCollector.Collect(port, serializedMode, typeof(T));
+            return RddProxy.RDDCollector.Collect(info, serializedMode, typeof(T));
         }
 
         /// <summary>
@@ -830,9 +832,9 @@ public T[] Take(int num)
 
 
                 var mappedRDD = MapPartitionsWithIndex<T>(new TakeHelper<T>(left).Execute);
-                int port = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, partitions);
+                var info = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, partitions);
 
-                IEnumerable<T> res = Collect(port).Cast<T>();
+                IEnumerable<T> res = Collect(info).Cast<T>();
 
                 items.AddRange(res);
                 partsScanned += numPartsToTry;
@@ -925,7 +927,7 @@ public RDD<T> Subtract(RDD<T> other, int numPartitions = 0)
         /// <returns></returns>
         public RDD<T> Repartition(int numPartitions)
         {
-            return new RDD<T>(RddProxy.Repartition(numPartitions), sparkContext);
+            return new RDD<T>(RddProxy.Repartition(numPartitions), sparkContext, serializedMode);
         }
 
         /// <summary>
@@ -942,8 +944,8 @@ public RDD<T> Repartition(int numPartitions)
         /// <returns></returns>
         public RDD<T> Coalesce(int numPartitions, bool shuffle = false)
         {
-            return new RDD<T>(RddProxy.Coalesce(numPartitions, shuffle), sparkContext);
-        }
+            return new RDD<T>(RddProxy.Coalesce(numPartitions, shuffle), sparkContext, serializedMode);
+		}
 
         /// <summary>
         /// Zips this RDD with another one, returning key-value pairs with the
@@ -1065,8 +1067,8 @@ public IEnumerable<T> ToLocalIterator()
             foreach (int partition in Enumerable.Range(0, GetNumPartitions()))
             {
                 var mappedRDD = MapPartitionsWithIndex<T>((pid, iter) => iter);
-                int port = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, Enumerable.Range(partition, 1));
-                foreach (T row in Collect(port))
+                var info = sparkContext.SparkContextProxy.RunJob(mappedRDD.RddProxy, Enumerable.Range(partition, 1));
+                foreach (T row in Collect(info))
                     yield return row;
             }
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs
index 6d92ad29..05963958 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDDCollector.cs
@@ -11,6 +11,7 @@
 using System.Text;
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Network;
+using Microsoft.Spark.CSharp.Services;
 using Microsoft.Spark.CSharp.Sql;
 
 namespace Microsoft.Spark.CSharp.Core
@@ -20,14 +21,31 @@ namespace Microsoft.Spark.CSharp.Core
     /// </summary>
     class RDDCollector : IRDDCollector
     {
-        public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
+		private static ILoggerService logger;
+		private static ILoggerService Logger
+		{
+			get
+			{
+				if (logger != null) return logger;
+				logger = LoggerServiceFactory.GetLogger(typeof(RDDCollector));
+				return logger;
+			}
+		}
+
+		public IEnumerable<dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type)
         {
             IFormatter formatter = new BinaryFormatter();
             var sock = SocketFactory.CreateSocket();
-            sock.Connect(IPAddress.Loopback, port);
+            sock.Connect(IPAddress.Loopback, info.Port, null);
 
             using (var s = sock.GetStream())
             {
+                if (info.Secret != null)
+                {
+                    SerDe.Write(s, info.Secret);
+                    var reply = SerDe.ReadString(s);
+                    Logger.LogDebug("Connect back to JVM: " + reply);
+                }
                 byte[] buffer;
                 while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0)
                 {
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs
index a3e6cd96..366ed966 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridge.cs
@@ -36,7 +36,7 @@ private ISocketWrapper GetConnection()
             if (!sockets.TryDequeue(out socket))
             {
                 socket = SocketFactory.CreateSocket();
-                socket.Connect(IPAddress.Loopback, portNumber);
+                socket.Connect(IPAddress.Loopback, portNumber, null);
             }
             return socket;
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs
index 75c27e22..12cdd934 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmObjectReference.cs
@@ -12,12 +12,12 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
     /// Reference to object created in JVM
     /// </summary>
     [Serializable]
-    internal class JvmObjectReference
+    public class JvmObjectReference
     {
         public string Id { get; private set; }
         private DateTime creationTime;
 
-        public JvmObjectReference(string jvmReferenceId)
+        internal JvmObjectReference(string jvmReferenceId)
         {
             Id = jvmReferenceId;
             creationTime = DateTime.UtcNow;
@@ -48,6 +48,11 @@ public override int GetHashCode()
             return base.GetHashCode();
         }
 
+	    public string ObjectToString()
+	    {
+			return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "toString").ToString();
+		}
+
         public string GetDebugInfo()
         {
             var javaObjectReferenceForClassObject = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(this, "getClass").ToString());
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs
index bee4625f..befa7ee5 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/SparkCLREnvironment.cs
@@ -31,7 +31,9 @@ internal static ISparkCLRProxy SparkCLRProxy
             }
         }
 
-        internal static IConfigurationService configurationService;
+		internal static IJvmBridge JvmBridge => SparkCLRIpcProxy.JvmBridge;
+
+	    internal static IConfigurationService configurationService;
 
         internal static IConfigurationService ConfigurationService
         {
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/ByteBuf.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/ByteBuf.cs
index 90a11796..57886d51 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/ByteBuf.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/ByteBuf.cs
@@ -11,7 +11,7 @@ namespace Microsoft.Spark.CSharp.Network
     /// ByteBuf delimits a section of a ByteBufChunk.
     /// It is the smallest unit to be allocated.
     /// </summary>
-    internal class ByteBuf
+    public class ByteBuf
     {
         private int readerIndex;
         private int writerIndex;
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs
index 3db32f5c..8c96fcca 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/DefaultSocketWrapper.cs
@@ -2,182 +2,203 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Net;
 using System.Net.Sockets;
+using System.Text;
+using System.Threading;
 using Microsoft.Spark.CSharp.Configuration;
 using Microsoft.Spark.CSharp.Services;
 
 namespace Microsoft.Spark.CSharp.Network
 {
-    /// <summary>
-    /// A simple wrapper of System.Net.Sockets.Socket class.
-    /// </summary>
-    internal class DefaultSocketWrapper : ISocketWrapper
-    {
-        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(DefaultSocketWrapper));
-        private readonly Socket innerSocket;
-
-        /// <summary>
-        /// Default constructor that creates a new instance of DefaultSocket class which represents
-        /// a traditional socket (System.Net.Socket.Socket).
-        /// 
-        /// This socket is bound to Loopback with port 0.
-        /// </summary>
-        public DefaultSocketWrapper()
-        {
-            innerSocket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
-            var localEndPoint = new IPEndPoint(IPAddress.Loopback, 0);
-            innerSocket.Bind(localEndPoint);
-        }
-
-        /// <summary>
-        /// Initializes a instance of DefaultSocket class using the specified System.Net.Socket.Socket object.
-        /// </summary>
-        /// <param name="socket">The existing socket</param>
-        private DefaultSocketWrapper(Socket socket)
-        {
-            innerSocket = socket;
-        }
-
-        /// <summary>
-        /// Accepts a incoming connection request.
-        /// </summary>
-        /// <returns>A DefaultSocket instance used to send and receive data</returns>
-        public ISocketWrapper Accept()
-        {
-            var socket = innerSocket.Accept();
-            return new DefaultSocketWrapper(socket);
-        }
-
-        /// <summary>
-        /// Close the socket connections and releases all associated resources.
-        /// </summary>
-        public void Close()
-        {
-            innerSocket.Close();
-        }
-
-        /// <summary>
-        /// Establishes a connection to a remote host that is specified by an IP address and a port number
-        /// </summary>
-        /// <param name="remoteaddr">The IP address of the remote host</param>
-        /// <param name="port">The port number of the remote host</param>
-        public void Connect(IPAddress remoteaddr, int port)
-        {
-            var remoteEndPoint = new IPEndPoint(remoteaddr, port);
-            innerSocket.Connect(remoteEndPoint);
-        }
-
-        /// <summary>
-        /// Returns the NetworkStream used to send and receive data.
-        /// </summary>
-        /// <returns>The underlying Stream instance that be used to send and receive data</returns>
-        /// <remarks>
-        /// GetStream returns a NetworkStream that you can use to send and receive data. You must close/dispose
-        /// the NetworkStream by yourself. Closing DefaultSocketWrapper does not release the NetworkStream
-        /// </remarks>
-        public Stream GetStream()
-        {
-            return new NetworkStream(innerSocket);
-        }
-
-        /// <summary>
-        /// Returns a stream used to receive data only.
-        /// </summary>
-        /// <returns>The underlying Stream instance that be used to receive data</returns>
-        public Stream GetInputStream()
-        {
-            // The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
-            var readBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerReadBufferSizeEnvName) ?? "65536");
-            logger.LogDebug("Input stream buffer size: [{0}]", readBufferSize);
-            return readBufferSize > 0 ? new BufferedStream(GetStream(), readBufferSize) : GetStream();
-        }
-
-        /// <summary>
-        /// Returns a stream used to send data only.
-        /// </summary>
-        /// <returns>The underlying Stream instance that be used to send data</returns>
-        public Stream GetOutputStream()
-        {
-            // The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
-            var writeBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerWriteBufferSizeEnvName) ?? "65536");
-            logger.LogDebug("Output stream buffer size: [{0}]", writeBufferSize);
-            return writeBufferSize > 0 ? new BufferedStream(GetStream(), writeBufferSize) : GetStream();
-        }
-
-        /// <summary>
-        /// Starts listening for incoming connections requests
-        /// </summary>
-        /// <param name="backlog">The maximum length of the pending connections queue. </param>
-        public void Listen(int backlog = 16)
-        {
-            innerSocket.Listen(backlog);
-        }
-
-        /// <summary>
-        /// Receives network data from this socket, and returns a ByteBuf that contains the received data.
-        /// 
-        /// The DefaultSocketWrapper does not support this function.
-        /// </summary>
-        /// <returns>A ByteBuf object that contains received data.</returns>
-        public ByteBuf Receive()
-        {
-            throw new NotImplementedException();
-        }
-
-        /// <summary>
-        /// Sends data to this socket with a ByteBuf object that contains data to be sent.
-        /// 
-        /// The DefaultSocketWrapper does not support this function.
-        /// </summary>
-        /// <param name="data">A ByteBuf object that contains data to be sent</param>
-        public void Send(ByteBuf data)
-        {
-            throw new NotImplementedException();
-        }
-
-        /// <summary>
-        /// Disposes the resources used by this instance of the DefaultSocket class.
-        /// </summary>
-        /// <param name="disposing"></param>
-        protected virtual void Dispose(bool disposing)
-        {
-            if (disposing)
-            {
-                innerSocket.Dispose();
-            }
-        }
-
-        /// <summary>
-        /// Releases all resources used by the current instance of the DefaultSocket class.
-        /// </summary>
-        public void Dispose()
-        {
-            Dispose(true);
-        }
-
-        /// <summary>
-        /// Frees resources used by DefaultSocket class
-        /// </summary>
-        ~DefaultSocketWrapper()
-        {
-            Dispose(false);
-        }
-
-        /// <summary>
-        /// Indicates whether there are data that has been received from the network and is available to be read.
-        /// </summary>
-        public bool HasData { get { return innerSocket.Available > 0; } }
-
-        /// <summary>
-        /// Returns the local endpoint.
-        /// </summary>
-        public EndPoint LocalEndPoint { get { return innerSocket.LocalEndPoint; } }
-
-        /// <summary>
-        /// Returns the remote endpoint if it has one.
-        /// </summary>
-        public EndPoint RemoteEndPoint { get { return innerSocket.RemoteEndPoint; } }
-    }
+	/// <summary>
+	/// A simple wrapper of System.Net.Sockets.Socket class.
+	/// </summary>
+	internal class DefaultSocketWrapper : ISocketWrapper
+	{
+		private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(DefaultSocketWrapper));
+		private readonly Socket innerSocket;
+
+		/// <summary>
+		/// Default constructor that creates a new instance of DefaultSocket class which represents
+		/// a traditional socket (System.Net.Socket.Socket).
+		/// 
+		/// This socket is bound to Loopback with port 0.
+		/// </summary>
+		public DefaultSocketWrapper()
+		{
+			innerSocket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
+			var localEndPoint = new IPEndPoint(IPAddress.Loopback, 0);
+			innerSocket.Bind(localEndPoint);
+		}
+
+		/// <summary>
+		/// Initializes a instance of DefaultSocket class using the specified System.Net.Socket.Socket object.
+		/// </summary>
+		/// <param name="socket">The existing socket</param>
+		private DefaultSocketWrapper(Socket socket)
+		{
+			innerSocket = socket;
+		}
+
+		/// <summary>
+		/// Accepts a incoming connection request.
+		/// </summary>
+		/// <returns>A DefaultSocket instance used to send and receive data</returns>
+		public ISocketWrapper Accept()
+		{
+			var socket = innerSocket.Accept();
+			return new DefaultSocketWrapper(socket);
+		}
+
+		/// <summary>
+		/// Close the socket connections and releases all associated resources.
+		/// </summary>
+		public void Close()
+		{
+			innerSocket.Close();
+		}
+
+		/// <summary>
+		/// Establishes a connection to a remote host that is specified by an IP address and a port number
+		/// </summary>
+		/// <param name="remoteaddr">The IP address of the remote host</param>
+		/// <param name="port">The port number of the remote host</param>
+		public void Connect(IPAddress remoteaddr, int port, string secret)
+		{
+			var remoteEndPoint = new IPEndPoint(remoteaddr, port);
+			innerSocket.Connect(remoteEndPoint);
+		}
+
+		private static byte[] ReceiveAll(Socket socket, int len)
+		{
+			var buffer = new List<byte>();
+
+			while (socket.Available > 0 && buffer.Count < len)
+			{
+				var currByte = new Byte[1];
+				var byteCounter = socket.Receive(currByte, currByte.Length, SocketFlags.None);
+
+				if (byteCounter.Equals(1))
+				{
+					buffer.Add(currByte[0]);
+				}
+			}
+
+			return buffer.ToArray();
+		}
+
+		/// <summary>
+		/// Returns the NetworkStream used to send and receive data.
+		/// </summary>
+		/// <returns>The underlying Stream instance that be used to send and receive data</returns>
+		/// <remarks>
+		/// GetStream returns a NetworkStream that you can use to send and receive data. You must close/dispose
+		/// the NetworkStream by yourself. Closing DefaultSocketWrapper does not release the NetworkStream
+		/// </remarks>
+		public Stream GetStream()
+		{
+			return new NetworkStream(innerSocket);
+		}
+
+		/// <summary>
+		/// Returns a stream used to receive data only.
+		/// </summary>
+		/// <returns>The underlying Stream instance that be used to receive data</returns>
+		public Stream GetInputStream()
+		{
+			// The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
+			var readBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerReadBufferSizeEnvName) ?? "65536");
+			logger.LogDebug("Input stream buffer size: [{0}]", readBufferSize);
+			return readBufferSize > 0 ? new BufferedStream(GetStream(), readBufferSize) : GetStream();
+		}
+
+		/// <summary>
+		/// Returns a stream used to send data only.
+		/// </summary>
+		/// <returns>The underlying Stream instance that be used to send data</returns>
+		public Stream GetOutputStream()
+		{
+			// The default buffer size is 64K, PythonRDD also use 64K as default buffer size.
+			var writeBufferSize = int.Parse(Environment.GetEnvironmentVariable(ConfigurationService.CSharpWorkerWriteBufferSizeEnvName) ?? "65536");
+			logger.LogDebug("Output stream buffer size: [{0}]", writeBufferSize);
+			return writeBufferSize > 0 ? new BufferedStream(GetStream(), writeBufferSize) : GetStream();
+		}
+
+		/// <summary>
+		/// Starts listening for incoming connections requests
+		/// </summary>
+		/// <param name="backlog">The maximum length of the pending connections queue. </param>
+		public void Listen(int backlog = 16)
+		{
+			innerSocket.Listen(backlog);
+		}
+
+		/// <summary>
+		/// Receives network data from this socket, and returns a ByteBuf that contains the received data.
+		/// 
+		/// The DefaultSocketWrapper does not support this function.
+		/// </summary>
+		/// <returns>A ByteBuf object that contains received data.</returns>
+		public ByteBuf Receive()
+		{
+			throw new NotImplementedException();
+		}
+
+		/// <summary>
+		/// Sends data to this socket with a ByteBuf object that contains data to be sent.
+		/// 
+		/// The DefaultSocketWrapper does not support this function.
+		/// </summary>
+		/// <param name="data">A ByteBuf object that contains data to be sent</param>
+		public void Send(ByteBuf data)
+		{
+			throw new NotImplementedException();
+		}
+
+		/// <summary>
+		/// Disposes the resources used by this instance of the DefaultSocket class.
+		/// </summary>
+		/// <param name="disposing"></param>
+		protected virtual void Dispose(bool disposing)
+		{
+			if (disposing)
+			{
+				innerSocket.Dispose();
+			}
+		}
+
+		/// <summary>
+		/// Releases all resources used by the current instance of the DefaultSocket class.
+		/// </summary>
+		public void Dispose()
+		{
+			Dispose(true);
+		}
+
+		/// <summary>
+		/// Frees resources used by DefaultSocket class
+		/// </summary>
+		~DefaultSocketWrapper()
+		{
+			Dispose(false);
+		}
+
+		/// <summary>
+		/// Indicates whether there are data that has been received from the network and is available to be read.
+		/// </summary>
+		public bool HasData { get { return innerSocket.Available > 0; } }
+
+		/// <summary>
+		/// Returns the local endpoint.
+		/// </summary>
+		public EndPoint LocalEndPoint { get { return innerSocket.LocalEndPoint; } }
+
+		/// <summary>
+		/// Returns the remote endpoint if it has one.
+		/// </summary>
+		public EndPoint RemoteEndPoint { get { return innerSocket.RemoteEndPoint; } }
+	}
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs
index b08dcd6f..45b61d26 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/ISocketWrapper.cs
@@ -11,7 +11,7 @@ namespace Microsoft.Spark.CSharp.Network
     /// ISocketWrapper interface defines the common methods to operate a socket (traditional socket or 
     /// Windows Registered IO socket)
     /// </summary>
-    internal interface ISocketWrapper : IDisposable
+    public interface ISocketWrapper : IDisposable
     {
         /// <summary>
         /// Accepts a incoming connection request.
@@ -24,12 +24,13 @@ internal interface ISocketWrapper : IDisposable
         /// </summary>
         void Close();
 
-        /// <summary>
-        /// Establishes a connection to a remote host that is specified by an IP address and a port number
-        /// </summary>
-        /// <param name="remoteaddr">The IP address of the remote host</param>
-        /// <param name="port">The port number of the remote host</param>
-        void Connect(IPAddress remoteaddr, int port);
+	    /// <summary>
+	    /// Establishes a connection to a remote host that is specified by an IP address and a port number
+	    /// </summary>
+	    /// <param name="remoteaddr">The IP address of the remote host</param>
+	    /// <param name="port">The port number of the remote host</param>
+	    /// <param name="secret">The secret to connect, can be null</param>
+	    void Connect(IPAddress remoteaddr, int port, string secret);
 
         /// <summary>
         /// Returns a stream used to send and receive data.
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/RioSocketWrapper.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/RioSocketWrapper.cs
index 740787f2..54e73ed0 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/RioSocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/RioSocketWrapper.cs
@@ -151,7 +151,7 @@ public void Close()
         /// </summary>
         /// <param name="remoteaddr">The IP address of the remote host</param>
         /// <param name="port">The port number of the remote host</param>
-        public void Connect(IPAddress remoteaddr, int port)
+        public void Connect(IPAddress remoteaddr, int port, string secret)
         {
             EnsureAccessible();
 
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/SaeaSocketWrapper.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/SaeaSocketWrapper.cs
index cb8ed0fe..505bf96d 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Network/SaeaSocketWrapper.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/SaeaSocketWrapper.cs
@@ -111,7 +111,7 @@ public void Close()
         /// </summary>
         /// <param name="remoteaddr">The IP address of the remote host</param>
         /// <param name="port">The port number of the remote host</param>
-        public void Connect(IPAddress remoteaddr, int port)
+        public void Connect(IPAddress remoteaddr, int port, string secret)
         {
             var remoteEndPoint = new IPEndPoint(remoteaddr, port);
             innerSocket.Connect(remoteEndPoint);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Network/SocketInfo.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Network/SocketInfo.cs
new file mode 100644
index 00000000..d14e5cc7
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Network/SocketInfo.cs
@@ -0,0 +1,28 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+
+namespace Microsoft.Spark.CSharp.Network
+{
+	public class SocketInfo
+	{
+		public readonly int Port;
+		public readonly string Secret;
+
+		public SocketInfo(int port, string secret)
+		{
+			Port = port;
+			Secret = secret;
+		}
+
+		public static SocketInfo Parse(object o)
+		{
+			var oo = o as List<JvmObjectReference>;
+			if (oo == null) throw new Exception(o.ToString() + " is not socket info "+typeof(List<JvmObjectReference>)+" "+o.GetType());
+			return new SocketInfo(int.Parse(oo[0].ObjectToString()), oo[1].ObjectToString());
+		}
+	}
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs
index 99285237..87071d9c 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameProxy.cs
@@ -13,7 +13,7 @@ internal interface IDataFrameProxy
         IRDDProxy JavaToCSharp();
         string GetQueryExecution();
         string GetExecutedPlan();
-        string GetShowString(int numberOfRows, bool truncate);
+        string GetShowString(int numberOfRows, int truncate, bool vertical);
         bool IsLocal();
         IStructTypeProxy GetSchema();
         IRDDProxy ToJSON();
@@ -59,7 +59,9 @@ internal interface IDataFrameProxy
         IDataFrameProxy Repartition(int numPartitions, IColumnProxy[] columns);
         IDataFrameProxy Repartition(IColumnProxy[] columns);
         IDataFrameProxy Sample(bool withReplacement, double fraction, long seed);
-        IDataFrameWriterProxy Write();
+	    IDataFrameProxy Broadcast();
+
+		IDataFrameWriterProxy Write();
     }
 
     internal interface IUDFProxy
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
index e323cf47..24788c07 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
@@ -7,6 +7,7 @@
 using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Network;
 
 namespace Microsoft.Spark.CSharp.Proxy
 {
@@ -41,6 +42,6 @@ internal interface IRDDProxy
         void SaveAsSequenceFile(string path, string compressionCodecClass);
         void SaveAsTextFile(string path, string compressionCodecClass);
         long Count();
-        int CollectAndServe();
+        SocketInfo CollectAndServe();
     }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
index f1a00acb..a53fdab7 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
@@ -8,6 +8,7 @@
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop;
+using Microsoft.Spark.CSharp.Network;
 
 
 namespace Microsoft.Spark.CSharp.Proxy
@@ -50,7 +51,7 @@ internal interface ISparkContextProxy
         void CancelJobGroup(string groupId);
         void CancelAllJobs();
         IStatusTrackerProxy StatusTracker { get; }
-        int RunJob(IRDDProxy rdd, IEnumerable<int> partitions);
+        SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions);
         IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId);
         IRDDProxy CreateCSharpRdd(IRDDProxy prefvJavaRddReference, byte[] command, Dictionary<string, string> environmentVariables, List<string> pythonIncludes, bool preservePartitioning, List<Broadcast> broadcastVariables, List<byte[]> accumulator);
         IRDDProxy CreatePairwiseRDD(IRDDProxy javaReferenceInByteArrayRdd, int numPartitions, long partitionFuncId);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs
index 177d33c5..85c1210c 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameIpcProxy.cs
@@ -79,12 +79,12 @@ public string GetExecutedPlan()
             return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(executedPlanReference, "toString", new object[] { }).ToString();
         }
 
-        public string GetShowString(int numberOfRows, bool truncate)
+        public string GetShowString(int numberOfRows, int truncate, bool vertical)
         {
             return
                 SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
                     jvmDataFrameReference, "showString",
-                    new object[] { numberOfRows, truncate }).ToString(); 
+                    new object[] { numberOfRows, truncate, vertical}).ToString(); 
         }
 
         public bool IsLocal()
@@ -575,7 +575,16 @@ public IDataFrameProxy Sample(bool withReplacement, double fraction, long seed)
                         new object[] { withReplacement, fraction, seed }).ToString()), sqlContextProxy);
         }
 
-        public IDataFrameWriterProxy Write()
+		public IDataFrameProxy Broadcast()
+		{
+			return
+				new DataFrameIpcProxy(
+					new JvmObjectReference(
+						SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.functions", "broadcast",
+							new object[] { jvmDataFrameReference }).ToString()), sqlContextProxy);
+		}
+
+		public IDataFrameWriterProxy Write()
         {
             return new DataFrameWriterIpcProxy(new JvmObjectReference(
                     SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDataFrameReference, "write").ToString()));
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
index 9377c079..3ef65772 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
@@ -12,6 +12,7 @@
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop;
 using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Network;
 
 namespace Microsoft.Spark.CSharp.Proxy.Ipc
 {
@@ -66,10 +67,10 @@ public long Count()
             return long.Parse(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(rdd, "count").ToString());
         }
 
-        public int CollectAndServe()
+        public SocketInfo CollectAndServe()
         {
             var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd"));
-            return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "collectAndServe", new object[] { rdd }).ToString());
+            return SocketInfo.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "collectAndServe", new object[] { rdd }));
         }
 
 
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
index 01290fd2..f48aa52e 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
@@ -11,6 +11,7 @@
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop;
 using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Proxy.Ipc;
 
 namespace Microsoft.Spark.CSharp.Proxy.Ipc
@@ -134,10 +135,8 @@ public IHadoopConfigurationProxy HadoopConfiguration
 
         public void Accumulator(int port)
         {
-            jvmAccumulatorReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "accumulator", 
-                SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList"),
-                SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonAccumulatorParam", IPAddress.Loopback.ToString(), port)
-            ));
+            jvmAccumulatorReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonAccumulatorV2", IPAddress.Loopback.ToString(), port);
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkContextReference, "register", new object[] { jvmAccumulatorReference });
         }
 
         public void Stop()
@@ -241,7 +240,7 @@ public void SetCheckpointDir(string directory)
 
         public void SetJobGroup(string groupId, string description, bool interruptOnCancel)
         {
-            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "setCheckpointDir", new object[] { groupId, description, interruptOnCancel });
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "setJobGroup", new object[] { groupId, description, interruptOnCancel });
         }
 
         public void SetLocalProperty(string key, string value)
@@ -344,10 +343,10 @@ public IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, st
 
         }
         
-        public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
+        public SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
         {
             var jpartitions = JvmBridgeUtils.GetJavaList<int>(partitions);
-            return int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }).ToString());
+            return SocketInfo.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }));
         }
 
         public IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId)
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
index febfd3b5..bc6e5a19 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
@@ -27,7 +27,9 @@ public IUdfRegistrationProxy Udf
             }
         }
 
-        public ISqlContextProxy SqlContextProxy
+	    internal JvmObjectReference JvmReference => jvmSparkSessionReference;
+
+		public ISqlContextProxy SqlContextProxy
         {
             get { return sqlContextProxy; }
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
index 4bb930fe..d6f00984 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
@@ -106,7 +106,7 @@ public void RegisterFunction(string name, byte[] command, string returnType)
 
             var udf = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.execution.python.UserDefinedPythonFunction", new object[]
                 {
-                    name, function, dt
+                    name, function, dt, 100 /*BatchUDF*/, true /*deterministic*/
                 });
 
             SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython", new object[] { name, udf });
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
index 66601ca2..b288baa9 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
@@ -6,7 +6,9 @@
 using System.Globalization;
 using System.Linq;
 using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
 using Microsoft.Spark.CSharp.Services;
 
 namespace Microsoft.Spark.CSharp.Sql
@@ -66,10 +68,12 @@ private IRDDProxy RddProxy
             }
         }
 
-        /// <summary>
-        /// Returns true if the collect and take methods can be run locally (without any Spark executors).
-        /// </summary>
-        public bool IsLocal
+	    internal JvmObjectReference JvmReference => (dataFrameProxy as DataFrameIpcProxy)?.JvmDataFrameReference;
+
+		/// <summary>
+		/// Returns true if the collect and take methods can be run locally (without any Spark executors).
+		/// </summary>
+		public bool IsLocal
         {
             get
             {
@@ -145,10 +149,11 @@ public long Count()
         /// </summary>
         /// <param name="numberOfRows">Number of rows to display - default 20</param>
         /// <param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
-        public void Show(int numberOfRows = 20, bool truncate = true)
+        /// <param name="vertical">If set to True, print output rows vertically (one line per column value).</param>
+        public void Show(int numberOfRows = 20, int truncate = 20, bool vertical = false)
         {
             logger.LogInfo("Writing {0} rows in the DataFrame to Console output", numberOfRows);
-            Console.WriteLine(dataFrameProxy.GetShowString(numberOfRows, truncate));
+            Console.WriteLine(dataFrameProxy.GetShowString(numberOfRows, truncate, vertical));
         }
 
         /// <summary>
@@ -166,8 +171,8 @@ public void ShowSchema()
         /// </summary>
         public IEnumerable<Row> Collect()
         {
-            int port = RddProxy.CollectAndServe();
-            return Rdd.Collect(port).Cast<Row>();
+            var info = RddProxy.CollectAndServe();
+            return Rdd.Collect(info).Cast<Row>();
         }
 
         //TODO - add this method if needed to convert Row to collection of T
@@ -917,10 +922,11 @@ public DataFrame Coalesce(int numPartitions)
         /// <summary>
         /// Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
         /// </summary>
+        /// <param name="type">Persist storage type</param>
         // Python API: https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py persist(self, storageLevel)
-        public DataFrame Persist()
+        public DataFrame Persist(StorageLevelType type= StorageLevelType.MEMORY_AND_DISK)
         {
-            dataFrameProxy.Persist(StorageLevelType.MEMORY_AND_DISK);
+            dataFrameProxy.Persist(type);
             return this;
         }
 
@@ -944,6 +950,11 @@ public DataFrame Cache()
             return Persist();
         }
 
+	    public DataFrame Broadcast()
+	    {
+			return new DataFrame(dataFrameProxy.Broadcast(), sparkContext);
+		}
+
         /// <summary>
         /// Returns a new DataFrame that has exactly `numPartitions` partitions.
         /// </summary>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs
index 04fcc90c..c27700e2 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameReader.cs
@@ -159,5 +159,18 @@ public DataFrame Parquet(params string[] path)
             logger.LogInfo("Constructing DataFrame using Parquet source {0}", string.Join(";", path));
             return new DataFrame(dataFrameReaderProxy.Parquet(path), sparkContext);
         }
-    }
+
+		/// <summary>
+		/// Loads a AVRO file (one object per line) and returns the result as a DataFrame.
+		/// 
+		/// This function goes through the input once to determine the input schema. If you know the
+		/// schema in advance, use the version that specifies the schema to avoid the extra scan.
+		/// </summary>
+		/// <param name="path">input path</param>
+		public DataFrame Avro(string path)
+		{
+			logger.LogInfo("Constructing DataFrame using AVRO source {0}", path);
+			return Format("com.databricks.spark.avro").Load(path);
+		}
+	}
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs
index a16478dd..9fa9fdb0 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs
@@ -170,5 +170,16 @@ public void Parquet(string path)
         {
             Format("parquet").Save(path);
         }
-    }
+
+		/// <summary>
+		/// Saves the content of the DataFrame in AVRO format at the specified path.
+		/// This is equivalent to:
+		///    Format("com.databricks.spark.avro").Save(path)
+		/// </summary>
+		public void Avro(string path)
+		{
+			Format("com.databricks.spark.avro").Save(path);
+		}
+
+	}
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs
index b3a81cf0..bc89168c 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs
@@ -92,7 +92,8 @@ public IEnumerable<string> Columns()
         /// </summary>
         /// <param name="numberOfRows">Number of rows - default is 20</param>
         /// <param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
-        public void Show(int numberOfRows = 20, bool truncate = true)
+        /// <param name="vertical">If set to true, prints output rows vertically (one line per column value).</param>
+        public void Show(int numberOfRows = 20, int truncate = 20, bool vertical = false)
         {
             ToDF().Show(numberOfRows, truncate);
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs
index c9166fe0..a23d91a0 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Functions.cs
@@ -4,6 +4,8 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Reflection;
+using System.Runtime.Serialization;
 using System.Text;
 using System.Threading.Tasks;
 
@@ -1119,5 +1121,42 @@ internal IEnumerable<dynamic> Execute(int pid, IEnumerable<dynamic> input)
             return input.Select(a => func((A1)(a[0]), (A2)(a[1]), (A3)(a[2]), (A4)(a[3]), (A5)(a[4]), (A6)(a[5]), (A7)(a[6]), (A8)(a[7]), (A9)(a[8]), (A10)(a[9]))).Cast<dynamic>();
         }
     }
-    #endregion
+
+	[Serializable]
+	internal class UdfReflectionHelper
+	{
+		private readonly MethodInfo func;
+
+		[NonSerialized]
+		private object[] _cache;
+
+		internal UdfReflectionHelper(MethodInfo f)
+		{
+			func = f;
+			_cache = new object[func.GetParameters().Length];
+		}
+
+		public Type ReturnType => func.ReturnType;
+
+		[OnDeserialized()]
+		public void Init(StreamingContext context)
+		{
+			_cache = new object[func.GetParameters().Length];
+		}
+
+		internal IEnumerable<dynamic> Execute(int pid, IEnumerable<dynamic> input)
+		{
+			return input.Select(Run).Cast<dynamic>();
+		}
+
+		private dynamic Run(dynamic input)
+		{
+			for (int i = 0; i < _cache.Length; ++i)
+			{
+				_cache[i] = input[i];
+			}
+			return func.Invoke(null, _cache);
+		}
+	}
+	#endregion
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Row.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Row.cs
index 77614a71..a299d1a8 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Row.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Row.cs
@@ -18,17 +18,24 @@ public abstract class Row
         [NonSerialized]
         private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(Row));
 
-        /// <summary>
-        /// Number of elements in the Row.
-        /// </summary>
-        /// <returns>elements count in this row</returns>
-        public abstract int Size();
+	    public abstract dynamic[] Values { get; }
+
+		/// <summary>
+		/// Number of elements in the Row.
+		/// </summary>
+		/// <returns>elements count in this row</returns>
+		public abstract int Size();
 
         /// <summary>
         /// Schema for the row.
         /// </summary>
         public abstract StructType GetSchema();
 
+	    public virtual void ResetValues(dynamic[] values)
+	    {
+		    throw new NotImplementedException();
+	    }
+
         /// <summary>
         /// Returns the value at position i.
         /// </summary>
@@ -80,8 +87,22 @@ public T GetAs<T>(string columnName)
     internal class RowImpl : Row
     {
         private readonly StructType schema;
-        public dynamic[] Values { get { return values; } }
-        private readonly dynamic[] values;
+
+	    public override dynamic[] Values
+	    {
+		    get
+		    {
+			    if (!valuesConverted)
+			    {
+				    schema.ConvertPickleObjects(rawValues,rawValues);
+				    valuesConverted = true;
+			    }
+			    return rawValues;
+		    }
+	    }
+
+        private dynamic[] rawValues;
+	    private bool valuesConverted = false;
 
         private readonly int columnCount;
 
@@ -96,11 +117,11 @@ internal RowImpl(dynamic data, StructType schema)
         {
             if (data is dynamic[])
             {
-                values = data as dynamic[];
+				rawValues = data as dynamic[];
             }
             else if (data is List<dynamic>)
             {
-                values = (data as List<dynamic>).ToArray();
+				rawValues = (data as List<dynamic>).ToArray();
             }
             else
             {
@@ -109,17 +130,25 @@ internal RowImpl(dynamic data, StructType schema)
 
             this.schema = schema;
             
-            columnCount = values.Count();
-            int schemaColumnCount = this.schema.Fields.Count();
+            columnCount = rawValues.Length;
+            int schemaColumnCount = this.schema.Fields.Count;
             if (columnCount != schemaColumnCount)
             {
                 throw new Exception(string.Format("column count inferred from data ({0}) and schema ({1}) mismatch", columnCount, schemaColumnCount));
             }
-
-            Initialize();
         }
 
-        public override int Size()
+	    public override void ResetValues(dynamic[] values)
+	    {
+			if (columnCount != values.Length)
+			{
+				throw new ArgumentException("column count inferred from data and schema mismatch");
+			}
+			rawValues = values;
+		    valuesConverted = false;
+		}
+
+	    public override int Size()
         {
             return columnCount;
         }
@@ -131,16 +160,15 @@ public override StructType GetSchema()
 
         public override dynamic Get(int i)
         {
+	        if (i >= 0 && i < columnCount) return Values[i];
             if (i >= columnCount)
             {
                 throw new Exception(string.Format("i ({0}) >= columnCount ({1})", i, columnCount));
             }
-            else if(i < 0)
+            else
             {
                 throw new Exception(string.Format("i ({0}) < 0", i));
             }
-
-            return values[i];
         }
 
         public override dynamic Get(string columnName)
@@ -152,7 +180,7 @@ public override dynamic Get(string columnName)
         public override string ToString()
         {
             List<string> cols = new List<string>();
-            foreach (var item in values)
+            foreach (var item in Values)
             {
                 if (item != null)
                 {
@@ -166,73 +194,7 @@ public override string ToString()
 
             return string.Format("[{0}]", string.Join(",", cols.ToArray()));
         }
-
-
-        private void Initialize()
-        {
-
-            int index = 0;
-            foreach (var field in schema.Fields)
-            {
-                if (field.DataType is ArrayType)
-                {
-                    Func<DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
-                                                                                      {
-                                                                                          StructField[] fields = new StructField[length];
-                                                                                          for(int i = 0; i < length ; i++)
-                                                                                          {
-                                                                                              fields[i] = new StructField(string.Format("_array_{0}", i), dataType);
-                                                                                          }
-                                                                                          return new StructType(fields);
-                                                                                      };
-                    var elementType = (field.DataType as ArrayType).ElementType;
-
-                    // Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)), 
-                    // then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
-                    // In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
-                    object[] valueOfArray = values[index] is ArrayList ? (values[index] as ArrayList).ToArray() : values[index] as object[];
-                    if (valueOfArray == null)
-                    {
-                        throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
-                    }
-
-                    values[index] = new RowImpl(valueOfArray, elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).values;
-                }
-                else if (field.DataType is MapType)
-                {
-                    //TODO
-                    throw new NotImplementedException();
-                }
-                else if (field.DataType is StructType)
-                {
-                    dynamic value = values[index];
-                    if (value != null)
-                    {
-                        var subRow = new RowImpl(values[index], field.DataType as StructType);
-                        values[index] = subRow;
-                    }
-                }
-                else if (field.DataType is DecimalType)
-                {
-                    //TODO
-                    throw new NotImplementedException();
-                }
-                else if (field.DataType is DateType)
-                {
-                    //TODO
-                    throw new NotImplementedException();
-                }
-                else if (field.DataType is StringType)
-                {
-                    if (values[index] != null) values[index] = values[index].ToString();
-                }
-                else
-                {
-                    values[index] = values[index];
-                }
-                index++;
-            }
-        }
+        
     }
 
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs
index 96b50c29..25726ad3 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/RowConstructor.cs
@@ -78,7 +78,7 @@ public Row GetRow()
             currentSchema = null;
             return row;
         }
-
+		
         //removes objects of type RowConstructor and replacing them with actual values
         private object[] GetValues(object[] arguments)
         {
@@ -86,7 +86,7 @@ private object[] GetValues(object[] arguments)
             int i = 0;
             foreach (var argument in arguments)
             {
-                if (argument != null && argument.GetType() == typeof(RowConstructor))
+                if (argument is RowConstructor)
                 {
                     values[i++] = (argument as RowConstructor).Values;
                 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
index c4f72885..aa702164 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
@@ -9,7 +9,9 @@
 using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
 using Microsoft.Spark.CSharp.Services;
 using Microsoft.Spark.CSharp.Sql.Catalog;
 
@@ -42,10 +44,12 @@ public Catalog.Catalog Catalog
             get { return catalog ?? (catalog = new Catalog.Catalog(SparkSessionProxy.GetCatalog())); }
         }
 
-        /// <summary>
-        /// Interface through which the user may access the underlying SparkContext.
-        /// </summary>
-        public SparkContext SparkContext { get; private set; }
+		internal JvmObjectReference JvmReference => (sparkSessionProxy as SparkSessionIpcProxy)?.JvmReference;
+
+		/// <summary>
+		/// Interface through which the user may access the underlying SparkContext.
+		/// </summary>
+		public SparkContext SparkContext { get; private set; }
 
         public UdfRegistration Udf
         {
@@ -114,18 +118,30 @@ public DataFrame CreateDataFrame(RDD<object[]> rdd, StructType schema)
             // The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
             // In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside. 
             // It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
-            var rddRow = rdd.Map(r => r);
+	        var rddRow = rdd.MapPartitions(r => r.Select(rr => rr));
             rddRow.serializedMode = SerializedMode.Row;
 
             return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), SparkContext);
         }
 
-        /// <summary>
-        /// Returns the specified table as a <see cref="DataFrame"/>
-        /// </summary>
-        /// <param name="tableName"></param>
-        /// <returns></returns>
-        public DataFrame Table(string tableName)
+		public DataFrame CreateDataFrame(RDD<Row> rdd, StructType schema)
+		{
+			// Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker. 
+			// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
+			// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside. 
+			// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
+			var rddRow = rdd.MapPartitions(rows => rows.Select(r => r.Values));
+			rddRow.serializedMode = SerializedMode.Row;
+
+			return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), SparkContext);
+		}
+
+		/// <summary>
+		/// Returns the specified table as a <see cref="DataFrame"/>
+		/// </summary>
+		/// <param name="tableName"></param>
+		/// <returns></returns>
+		public DataFrame Table(string tableName)
         {
             return new DataFrame(sparkSessionProxy.Table(tableName), SparkContext);
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
index 03e9fb26..c99e9010 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Collections.Generic;
+using System.Reflection;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Proxy;
 using Microsoft.Spark.CSharp.Services;
@@ -150,13 +151,25 @@ public DataFrame CreateDataFrame(RDD<object[]> rdd, StructType schema)
             return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
         }
 
-        /// <summary>
-        /// Registers the given <see cref="DataFrame"/> as a temporary table in the catalog.
-        /// Temporary tables exist only during the lifetime of this instance of SqlContext.
-        /// </summary>
-        /// <param name="dataFrame"></param>
-        /// <param name="tableName"></param>
-        public void RegisterDataFrameAsTable(DataFrame dataFrame, string tableName)
+		public DataFrame CreateDataFrame(RDD<Row> rdd, StructType schema)
+		{
+			// Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker. 
+			// The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
+			// In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside. 
+			// It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
+			var rddRow = rdd.Map(r => r);
+			rddRow.serializedMode = SerializedMode.Row;
+
+			return new DataFrame(sqlContextProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
+		}
+
+		/// <summary>
+		/// Registers the given <see cref="DataFrame"/> as a temporary table in the catalog.
+		/// Temporary tables exist only during the lifetime of this instance of SqlContext.
+		/// </summary>
+		/// <param name="dataFrame"></param>
+		/// <param name="tableName"></param>
+		public void RegisterDataFrameAsTable(DataFrame dataFrame, string tableName)
         {
             sqlContextProxy.RegisterDataFrameAsTable(dataFrame.DataFrameProxy, tableName);
         }
@@ -527,6 +540,14 @@ public void RegisterFunction<RT>(string name, Func<RT> f)
             Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
             sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
         }
-        #endregion
-    }
+
+		public void RegisterFunction(string name, MethodInfo f)
+		{
+			logger.LogInfo("Name of the function to register {0}, method info", name, f.DeclaringType?.FullName + "." + f.Name);
+			var helper = new UdfReflectionHelper(f);
+			Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = helper.Execute;
+			sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(helper.ReturnType));
+		}
+		#endregion
+	}
 }
\ No newline at end of file
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs
index 2efcf209..ef945c37 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Types.cs
@@ -2,6 +2,7 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System;
+using System.Collections;
 using System.Collections.Generic;
 using System.Linq;
 using System.Reflection;
@@ -14,512 +15,600 @@
 
 namespace Microsoft.Spark.CSharp.Sql
 {
-    /// <summary>
-    /// The base type of all Spark SQL data types.
-    /// </summary>
-    [Serializable]
-    public abstract class DataType
-    {
-        /// <summary>
-        /// Trim "Type" in the end from class name, ToLower() to align with Scala.
-        /// </summary>
-        public string TypeName
-        {
-            get { return NormalizeTypeName(GetType().Name); }
-        }
-
-        /// <summary>
-        /// return TypeName by default, subclass can override it
-        /// </summary>
-        public virtual string SimpleString
-        {
-            get { return TypeName; }
-        }
-
-        /// <summary>
-        /// return only type: TypeName by default, subclass can override it
-        /// </summary>
-        internal virtual object JsonValue { get { return TypeName; } }
-
-        /// <summary>
-        /// The compact JSON representation of this data type.
-        /// </summary>
-        public string Json
-        {
-            get
-            {
-                var jObject = JsonValue is JObject ? ((JObject)JsonValue).SortProperties() : JsonValue;
-                return JsonConvert.SerializeObject(jObject, Formatting.None);
-            }
-        }
-
-        /// <summary>
-        /// Parses a Json string to construct a DataType.
-        /// </summary>
-        /// <param name="json">The Json string to be parsed</param>
-        /// <returns>The new DataType instance from the Json string</returns>
-        public static DataType ParseDataTypeFromJson(string json)
-        {
-            return ParseDataTypeFromJson(JToken.Parse(json));
-        }
-
-        /// <summary>
-        /// Parse a JToken object to construct a DataType.
-        /// </summary>
-        /// <param name="json">The JToken object to be parsed</param>
-        /// <returns>The new DataType instance from the Json string</returns>
-        /// <exception cref="NotImplementedException">Not implemented for "udt" type</exception>
-        /// <exception cref="ArgumentException"></exception>
-        protected static DataType ParseDataTypeFromJson(JToken json)
-        {
-            if (json.Type == JTokenType.Object) // {name: address, type: {type: struct,...},...}
-            {
-                JToken type;
-                var typeJObject = (JObject)json;
-                if (typeJObject.TryGetValue("type", out type))
-                {
-                    Type complexType;
-                    if ((complexType = ComplexTypes.FirstOrDefault(ct => NormalizeTypeName(ct.Name) == type.ToString())) != default(Type))
-                    {
-                        return ((ComplexType)Activator.CreateInstance(complexType, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance
-                            , null, new object[] { typeJObject }, null)); // create new instance of ComplexType
-                    }
-                    if (type.ToString() == "udt")
-                    {
-                        // TODO
-                        throw new NotImplementedException();
-                    }
-                }
-                throw new ArgumentException(string.Format("Could not parse data type: {0}", type));
-            }
-            else // {name: age, type: bigint,...} // TODO: validate more JTokenType other than Object
-            {
-                return ParseAtomicType(json);
-            }
-
-        }
-
-        private static AtomicType ParseAtomicType(JToken type)
-        {
-            Type atomicType;
-            if ((atomicType = AtomicTypes.FirstOrDefault(at => NormalizeTypeName(at.Name) == type.ToString())) != default(Type))
-            {
-                return (AtomicType)Activator.CreateInstance(atomicType); // create new instance of AtomicType
-            }
-
-            Match fixedDecimal = DecimalType.FixedDecimal.Match(type.ToString());
-            if (fixedDecimal.Success)
-            {
-                return new DecimalType(int.Parse(fixedDecimal.Groups[1].Value), int.Parse(fixedDecimal.Groups[2].Value));
-            }
-
-            throw new ArgumentException(string.Format("Could not parse data type: {0}", type));
-        }
-
-        [NonSerialized]
-        private static readonly Type[] AtomicTypes = typeof(AtomicType).Assembly.GetTypes().Where(type =>
-            type.IsSubclassOf(typeof(AtomicType))).ToArray();
-
-        [NonSerialized]
-        private static readonly Type[] ComplexTypes = typeof(ComplexType).Assembly.GetTypes().Where(type =>
-            type.IsSubclassOf(typeof(ComplexType))).ToArray();
-
-        [NonSerialized]
-        private static readonly Func<string, string> NormalizeTypeName = s => s.Substring(0, s.Length - 4).ToLower(); // trim "Type" at the end of type name
-
-
-    }
-
-    /// <summary>
-    /// An internal type used to represent a simple type. 
-    /// </summary>
-    [Serializable]
-    public class AtomicType : DataType
-    {
-    }
-
-    /// <summary>
-    /// An internal type used to represent a complex type (such as arrays, structs, and maps).
-    /// </summary>
-    [Serializable]
-    public abstract class ComplexType : DataType
-    {
-        /// <summary>
-        /// Abstract method that constructs a complex type from a Json object
-        /// </summary>
-        /// <param name="json">The Json object to construct a complex type</param>
-        /// <returns>A new constructed complex type</returns>
-        public abstract DataType FromJson(JObject json);
-        /// <summary>
-        /// Constructs a complex type from a Json string
-        /// </summary>
-        /// <param name="json">The string that represents a Json.</param>
-        /// <returns>A new constructed complex type</returns>
-        public DataType FromJson(string json)
-        {
-            return FromJson(JObject.Parse(json));
-        }
-    }
-
-    /// <summary>
-    /// The data type representing NULL values.
-    /// </summary>
-    [Serializable]
-    public class NullType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing String values.
-    /// </summary>
-    [Serializable]
-    public class StringType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing binary values.
-    /// </summary>
-    [Serializable]
-    public class BinaryType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing Boolean values.
-    /// </summary>
-    [Serializable]
-    public class BooleanType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing Date values.
-    /// </summary>
-    [Serializable]
-    public class DateType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing Timestamp values. 
-    /// </summary>
-    [Serializable]
-    public class TimestampType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing Double values.
-    /// </summary>
-    [Serializable]
-    public class DoubleType : AtomicType { }
-
-    /// <summary>
-    /// 
-    /// </summary>
-    [Serializable]
-    public class FloatType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing Float values.
-    /// </summary>
-    [Serializable]
-    public class ByteType : AtomicType { }
-
-    /// <summary>
-    /// 
-    /// </summary>
-    [Serializable]
-    public class IntegerType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing Int values.
-    /// </summary>
-    [Serializable]
-    public class LongType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing Short values.
-    /// </summary>
-    [Serializable]
-    public class ShortType : AtomicType { }
-
-    /// <summary>
-    /// The data type representing Decimal values.
-    /// </summary>
-    [Serializable]
-    public class DecimalType : AtomicType
-    {
-        /// <summary>
-        /// Gets the regular expression that represents a fixed decimal. 
-        /// </summary>
-        public static Regex FixedDecimal = new Regex(@"decimal\((\d+),\s(\d+)\)");
-        private int? precision, scale;
-        /// <summary>
-        /// Initializes a new instance of DecimalType from parameters specifying its precision and scale.
-        /// </summary>
-        /// <param name="precision">The precision of the type</param>
-        /// <param name="scale">The scale of the type</param>
-        public DecimalType(int? precision = null, int? scale = null)
-        {
-            this.precision = precision;
-            this.scale = scale;
-        }
-
-        internal override object JsonValue
-        {
-            get { throw new NotImplementedException(); }
-        }
-
-        /// <summary>
-        /// Constructs a DecimalType from a Json object
-        /// </summary>
-        /// <param name="json">The Json object used to construct a DecimalType</param>
-        /// <returns>A new DecimalType instance</returns>
-        /// <exception cref="NotImplementedException">Not implemented yet.</exception>
-        public DataType FromJson(JObject json)
-        {
-            throw new NotImplementedException();
-        }
-    }
-
-    /// <summary>
-    /// The data type for collections of multiple values. 
-    /// </summary>
-    [Serializable]
-    public class ArrayType : ComplexType
-    {
-        /// <summary>
-        /// Gets the DataType of each element in the array
-        /// </summary>
-        public DataType ElementType { get { return elementType; } }
-        /// <summary>
-        /// Returns whether the array can contain null (None) values
-        /// </summary>
-        public bool ContainsNull { get { return containsNull; } }
-
-        /// <summary>
-        /// Initializes a ArrayType instance with a specific DataType and specifying if the array has null values.
-        /// </summary>
-        /// <param name="elementType">The data type of values</param>
-        /// <param name="containsNull">Indicates if values have null values</param>
-        public ArrayType(DataType elementType, bool containsNull = true)
-        {
-            this.elementType = elementType;
-            this.containsNull = containsNull;
-        }
-
-        internal ArrayType(JObject json)
-        {
-            FromJson(json);
-        }
-
-        /// <summary>
-        /// Readable string representation for the type.
-        /// </summary>
-        public override string SimpleString
-        {
-            get { return string.Format("array<{0}>", elementType.SimpleString); }
-        }
-
-        internal override object JsonValue
-        {
-            get
-            {
-                return new JObject(
-                                  new JProperty("type", TypeName),
-                                  new JProperty("elementType", elementType.JsonValue),
-                                  new JProperty("containsNull", containsNull));
-            }
-        }
-
-        /// <summary>
-        /// Constructs a ArrayType from a Json object
-        /// </summary>
-        /// <param name="json">The Json object used to construct a ArrayType</param>
-        /// <returns>A new ArrayType instance</returns>
-        public override sealed DataType FromJson(JObject json)
-        {
-            elementType = ParseDataTypeFromJson(json["elementType"]);
-            containsNull = (bool)json["containsNull"];
-            return this;
-        }
-
-        private DataType elementType;
-        private bool containsNull;
-    }
-
-    /// <summary>
-    /// The data type for Maps. Not implemented yet.
-    /// </summary>
-    [Serializable]
-    public class MapType : ComplexType
-    {
-        internal override object JsonValue
-        {
-            get { throw new NotImplementedException(); }
-        }
-
-        /// <summary>
-        /// Constructs a StructField from a Json object. Not implemented yet.
-        /// </summary>
-        /// <param name="json">The Json object used to construct a MapType</param>
-        /// <returns>A new MapType instance</returns>
-        /// <exception cref="NotImplementedException"></exception>
-        public override DataType FromJson(JObject json)
-        {
-            throw new NotImplementedException();
-        }
-    }
-
-    /// <summary>
-    /// A field inside a StructType.
-    /// </summary>
-    [Serializable]
-    public class StructField : ComplexType
-    {
-        /// <summary>
-        /// The name of this field.
-        /// </summary>
-        public string Name { get { return name; } }
-        /// <summary>
-        /// The data type of this field.
-        /// </summary>
-        public DataType DataType { get { return dataType; } }
-        /// <summary>
-        /// Indicates if values of this field can be null values.
-        /// </summary>
-        public bool IsNullable { get { return isNullable; } }
-        /// <summary>
-        /// The metadata of this field. The metadata should be preserved during transformation if the content of the column is not modified, e.g, in selection. 
-        /// </summary>
-        public JObject Metadata { get { return metadata; } }
-
-        /// <summary>
-        /// Initializes a StructField instance with a specific name, data type, nullable, and metadata
-        /// </summary>
-        /// <param name="name">The name of this field</param>
-        /// <param name="dataType">The data type of this field</param>
-        /// <param name="isNullable">Indicates if values of this field can be null values</param>
-        /// <param name="metadata">The metadata of this field</param>
-        public StructField(string name, DataType dataType, bool isNullable = true, JObject metadata = null)
-        {
-            this.name = name;
-            this.dataType = dataType;
-            this.isNullable = isNullable;
-            this.metadata = metadata ?? new JObject();
-        }
-
-        internal StructField(JObject json)
-        {
-            FromJson(json);
-        }
-
-        /// <summary>
-        /// Returns a readable string that represents the type.
-        /// </summary>
-        public override string SimpleString { get { return string.Format(@"{0}:{1}", name, dataType.SimpleString); } }
-
-        internal override object JsonValue
-        {
-            get
-            {
-                return new JObject(
-                            new JProperty("name", name),
-                            new JProperty("type", dataType.JsonValue),
-                            new JProperty("nullable", isNullable),
-                            new JProperty("metadata", metadata));
-            }
-        }
-
-        /// <summary>
-        /// Constructs a StructField from a Json object
-        /// </summary>
-        /// <param name="json">The Json object used to construct a StructField</param>
-        /// <returns>A new StructField instance</returns>
-        public override sealed DataType FromJson(JObject json)
-        {
-            name = json["name"].ToString();
-            dataType = ParseDataTypeFromJson(json["type"]);
-            isNullable = (bool)json["nullable"];
-            metadata = (JObject)json["metadata"];
-            return this;
-        }
-
-        private string name;
-        private DataType dataType;
-        private bool isNullable;
-        [NonSerialized]
-        private JObject metadata;
-    }
-
-    /// <summary>
-    /// Struct type, consisting of a list of StructField
-    /// This is the data type representing a Row
-    /// </summary>
-    [Serializable]
-    public class StructType : ComplexType
-    {
-        /// <summary>
-        /// Gets a list of StructField.
-        /// </summary>
-        public List<StructField> Fields { get { return fields; } }
-
-        internal IStructTypeProxy StructTypeProxy
-        {
-            get
-            {
-                return structTypeProxy ?? 
-                    new StructTypeIpcProxy(
-                        new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "createSchema",
-                            new object[] { Json }).ToString()));
-            }
-        }
-
-        /// <summary>
-        /// Initializes a StructType instance with a specific collection of SructField object.
-        /// </summary>
-        /// <param name="fields">The collection that holds StructField objects</param>
-        public StructType(IEnumerable<StructField> fields)
-        {
-            this.fields = fields.ToList();
-        }
-
-        internal StructType(JObject json)
-        {
-            FromJson(json);
-        }
-
-        internal StructType(IStructTypeProxy structTypeProxy)
-        {
-            this.structTypeProxy = structTypeProxy;
-            var jsonSchema = structTypeProxy.ToJson();
-            FromJson(jsonSchema);
-        }
-
-        /// <summary>
-        /// Returns a readable string that joins all <see cref="StructField"/>s together.
-        /// </summary>
-        public override string SimpleString
-        {
-            get { return string.Format(@"struct<{0}>", string.Join(",", fields.Select(f => f.SimpleString))); }
-        }
-
-        internal override object JsonValue
-        {
-            get
-            {
-                return new JObject(
-                                new JProperty("type", TypeName),
-                                new JProperty("fields", fields.Select(f => f.JsonValue).ToArray()));
-            }
-        }
-
-        /// <summary>
-        /// Constructs a StructType from a Json object
-        /// </summary>
-        /// <param name="json">The Json object used to construct a StructType</param>
-        /// <returns>A new StructType instance</returns>
-        public override sealed DataType FromJson(JObject json)
-        {
-            var fieldsJObjects = json["fields"].Select(f => (JObject)f);
-            fields = fieldsJObjects.Select(fieldJObject => (new StructField(fieldJObject))).ToList();
-            return this;
-        }
-
-        [NonSerialized]
-        private readonly IStructTypeProxy structTypeProxy;
-
-        private List<StructField> fields;
-    }
+	/// <summary>
+	/// The base type of all Spark SQL data types.
+	/// </summary>
+	[Serializable]
+	public abstract class DataType
+	{
+		/// <summary>
+		/// Trim "Type" in the end from class name, ToLower() to align with Scala.
+		/// </summary>
+		public string TypeName
+		{
+			get { return NormalizeTypeName(GetType().Name); }
+		}
+
+		/// <summary>
+		/// return TypeName by default, subclass can override it
+		/// </summary>
+		public virtual string SimpleString
+		{
+			get { return TypeName; }
+		}
+
+		/// <summary>
+		/// return only type: TypeName by default, subclass can override it
+		/// </summary>
+		internal virtual object JsonValue { get { return TypeName; } }
+
+		/// <summary>
+		/// The compact JSON representation of this data type.
+		/// </summary>
+		public string Json
+		{
+			get
+			{
+				var jObject = JsonValue is JObject ? ((JObject)JsonValue).SortProperties() : JsonValue;
+				return JsonConvert.SerializeObject(jObject, Formatting.None);
+			}
+		}
+
+		/// <summary>
+		/// Parses a Json string to construct a DataType.
+		/// </summary>
+		/// <param name="json">The Json string to be parsed</param>
+		/// <returns>The new DataType instance from the Json string</returns>
+		public static DataType ParseDataTypeFromJson(string json)
+		{
+			return ParseDataTypeFromJson(JToken.Parse(json));
+		}
+
+		/// <summary>
+		/// Parse a JToken object to construct a DataType.
+		/// </summary>
+		/// <param name="json">The JToken object to be parsed</param>
+		/// <returns>The new DataType instance from the Json string</returns>
+		/// <exception cref="NotImplementedException">Not implemented for "udt" type</exception>
+		/// <exception cref="ArgumentException"></exception>
+		protected static DataType ParseDataTypeFromJson(JToken json)
+		{
+			if (json.Type == JTokenType.Object) // {name: address, type: {type: struct,...},...}
+			{
+				JToken type;
+				var typeJObject = (JObject)json;
+				if (typeJObject.TryGetValue("type", out type))
+				{
+					Type complexType;
+					if ((complexType = ComplexTypes.FirstOrDefault(ct => NormalizeTypeName(ct.Name) == type.ToString())) != default(Type))
+					{
+						return ((ComplexType)Activator.CreateInstance(complexType, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance
+							, null, new object[] { typeJObject }, null)); // create new instance of ComplexType
+					}
+					if (type.ToString() == "udt")
+					{
+						// TODO
+						throw new NotImplementedException();
+					}
+				}
+				throw new ArgumentException(string.Format("Could not parse data type: {0}", type));
+			}
+			else // {name: age, type: bigint,...} // TODO: validate more JTokenType other than Object
+			{
+				return ParseAtomicType(json);
+			}
+
+		}
+
+		private static AtomicType ParseAtomicType(JToken type)
+		{
+			Type atomicType;
+			if ((atomicType = AtomicTypes.FirstOrDefault(at => NormalizeTypeName(at.Name) == type.ToString())) != default(Type))
+			{
+				return (AtomicType)Activator.CreateInstance(atomicType); // create new instance of AtomicType
+			}
+
+			Match fixedDecimal = DecimalType.FixedDecimal.Match(type.ToString());
+			if (fixedDecimal.Success)
+			{
+				return new DecimalType(int.Parse(fixedDecimal.Groups[1].Value), int.Parse(fixedDecimal.Groups[2].Value));
+			}
+
+			throw new ArgumentException(string.Format("Could not parse data type: {0}", type));
+		}
+
+		[NonSerialized]
+		private static readonly Type[] AtomicTypes = typeof(AtomicType).Assembly.GetTypes().Where(type =>
+			type.IsSubclassOf(typeof(AtomicType))).ToArray();
+
+		[NonSerialized]
+		private static readonly Type[] ComplexTypes = typeof(ComplexType).Assembly.GetTypes().Where(type =>
+			type.IsSubclassOf(typeof(ComplexType))).ToArray();
+
+		[NonSerialized]
+		private static readonly Func<string, string> NormalizeTypeName = s => s.Substring(0, s.Length - 4).ToLower(); // trim "Type" at the end of type name
+
+
+	}
+
+	/// <summary>
+	/// An internal type used to represent a simple type. 
+	/// </summary>
+	[Serializable]
+	public class AtomicType : DataType
+	{
+	}
+
+	/// <summary>
+	/// An internal type used to represent a complex type (such as arrays, structs, and maps).
+	/// </summary>
+	[Serializable]
+	public abstract class ComplexType : DataType
+	{
+		/// <summary>
+		/// Abstract method that constructs a complex type from a Json object
+		/// </summary>
+		/// <param name="json">The Json object to construct a complex type</param>
+		/// <returns>A new constructed complex type</returns>
+		public abstract DataType FromJson(JObject json);
+		/// <summary>
+		/// Constructs a complex type from a Json string
+		/// </summary>
+		/// <param name="json">The string that represents a Json.</param>
+		/// <returns>A new constructed complex type</returns>
+		public DataType FromJson(string json)
+		{
+			return FromJson(JObject.Parse(json));
+		}
+	}
+
+	/// <summary>
+	/// The data type representing NULL values.
+	/// </summary>
+	[Serializable]
+	public class NullType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing String values.
+	/// </summary>
+	[Serializable]
+	public class StringType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing binary values.
+	/// </summary>
+	[Serializable]
+	public class BinaryType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing Boolean values.
+	/// </summary>
+	[Serializable]
+	public class BooleanType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing Date values.
+	/// </summary>
+	[Serializable]
+	public class DateType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing Timestamp values. 
+	/// </summary>
+	[Serializable]
+	public class TimestampType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing Double values.
+	/// </summary>
+	[Serializable]
+	public class DoubleType : AtomicType { }
+
+	/// <summary>
+	/// 
+	/// </summary>
+	[Serializable]
+	public class FloatType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing Float values.
+	/// </summary>
+	[Serializable]
+	public class ByteType : AtomicType { }
+
+	/// <summary>
+	/// 
+	/// </summary>
+	[Serializable]
+	public class IntegerType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing Int values.
+	/// </summary>
+	[Serializable]
+	public class LongType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing Short values.
+	/// </summary>
+	[Serializable]
+	public class ShortType : AtomicType { }
+
+	/// <summary>
+	/// The data type representing Decimal values.
+	/// </summary>
+	[Serializable]
+	public class DecimalType : AtomicType
+	{
+		/// <summary>
+		/// Gets the regular expression that represents a fixed decimal. 
+		/// </summary>
+		public static Regex FixedDecimal = new Regex(@"decimal\s*\((\d+),\s*(\d+)\)");
+		private int? precision, scale;
+		/// <summary>
+		/// Initializes a new instance of DecimalType from parameters specifying its precision and scale.
+		/// </summary>
+		/// <param name="precision">The precision of the type</param>
+		/// <param name="scale">The scale of the type</param>
+		public DecimalType(int? precision = null, int? scale = null)
+		{
+			this.precision = precision;
+			this.scale = scale;
+		}
+
+		internal override object JsonValue
+		{
+			get
+			{
+				if (precision == null && scale == null) return "decimal";
+				return "decimal(" + precision + "," + scale + ")";
+			}
+		}
+
+		/// <summary>
+		/// Constructs a DecimalType from a Json object
+		/// </summary>
+		/// <param name="json">The Json object used to construct a DecimalType</param>
+		/// <returns>A new DecimalType instance</returns>
+		/// <exception cref="NotImplementedException">Not implemented yet.</exception>
+		public DataType FromJson(JObject json)
+		{
+			return ParseDataTypeFromJson(json);
+		}
+	}
+
+	/// <summary>
+	/// The data type for collections of multiple values. 
+	/// </summary>
+	[Serializable]
+	public class ArrayType : ComplexType
+	{
+		/// <summary>
+		/// Gets the DataType of each element in the array
+		/// </summary>
+		public DataType ElementType { get { return elementType; } }
+		/// <summary>
+		/// Returns whether the array can contain null (None) values
+		/// </summary>
+		public bool ContainsNull { get { return containsNull; } }
+
+		/// <summary>
+		/// Initializes a ArrayType instance with a specific DataType and specifying if the array has null values.
+		/// </summary>
+		/// <param name="elementType">The data type of values</param>
+		/// <param name="containsNull">Indicates if values have null values</param>
+		public ArrayType(DataType elementType, bool containsNull = true)
+		{
+			this.elementType = elementType;
+			this.containsNull = containsNull;
+		}
+
+		internal ArrayType(JObject json)
+		{
+			FromJson(json);
+		}
+
+		/// <summary>
+		/// Readable string representation for the type.
+		/// </summary>
+		public override string SimpleString
+		{
+			get { return string.Format("array<{0}>", elementType.SimpleString); }
+		}
+
+		internal override object JsonValue
+		{
+			get
+			{
+				return new JObject(
+								  new JProperty("type", TypeName),
+								  new JProperty("elementType", elementType.JsonValue),
+								  new JProperty("containsNull", containsNull));
+			}
+		}
+
+		/// <summary>
+		/// Constructs a ArrayType from a Json object
+		/// </summary>
+		/// <param name="json">The Json object used to construct a ArrayType</param>
+		/// <returns>A new ArrayType instance</returns>
+		public override sealed DataType FromJson(JObject json)
+		{
+			elementType = ParseDataTypeFromJson(json["elementType"]);
+			containsNull = (bool)json["containsNull"];
+			return this;
+		}
+
+		private DataType elementType;
+		private bool containsNull;
+	}
+
+	/// <summary>
+	/// The data type for Maps. Not implemented yet.
+	/// </summary>
+	[Serializable]
+	public class MapType : ComplexType
+	{
+		internal override object JsonValue
+		{
+			get { throw new NotImplementedException(); }
+		}
+
+		/// <summary>
+		/// Constructs a StructField from a Json object. Not implemented yet.
+		/// </summary>
+		/// <param name="json">The Json object used to construct a MapType</param>
+		/// <returns>A new MapType instance</returns>
+		/// <exception cref="NotImplementedException"></exception>
+		public override DataType FromJson(JObject json)
+		{
+			throw new NotImplementedException();
+		}
+	}
+
+	/// <summary>
+	/// A field inside a StructType.
+	/// </summary>
+	[Serializable]
+	public class StructField : ComplexType
+	{
+		/// <summary>
+		/// The name of this field.
+		/// </summary>
+		public string Name { get { return name; } }
+		/// <summary>
+		/// The data type of this field.
+		/// </summary>
+		public DataType DataType { get { return dataType; } }
+		/// <summary>
+		/// Indicates if values of this field can be null values.
+		/// </summary>
+		public bool IsNullable { get { return isNullable; } }
+		/// <summary>
+		/// The metadata of this field. The metadata should be preserved during transformation if the content of the column is not modified, e.g, in selection. 
+		/// </summary>
+		public JObject Metadata { get { return metadata; } }
+
+		/// <summary>
+		/// Initializes a StructField instance with a specific name, data type, nullable, and metadata
+		/// </summary>
+		/// <param name="name">The name of this field</param>
+		/// <param name="dataType">The data type of this field</param>
+		/// <param name="isNullable">Indicates if values of this field can be null values</param>
+		/// <param name="metadata">The metadata of this field</param>
+		public StructField(string name, DataType dataType, bool isNullable = true, JObject metadata = null)
+		{
+			this.name = name;
+			this.dataType = dataType;
+			this.isNullable = isNullable;
+			this.metadata = metadata ?? new JObject();
+		}
+
+		internal StructField(JObject json)
+		{
+			FromJson(json);
+		}
+
+		/// <summary>
+		/// Returns a readable string that represents the type.
+		/// </summary>
+		public override string SimpleString { get { return string.Format(@"{0}:{1}", name, dataType.SimpleString); } }
+
+		internal override object JsonValue
+		{
+			get
+			{
+				return new JObject(
+							new JProperty("name", name),
+							new JProperty("type", dataType.JsonValue),
+							new JProperty("nullable", isNullable),
+							new JProperty("metadata", metadata));
+			}
+		}
+
+		/// <summary>
+		/// Constructs a StructField from a Json object
+		/// </summary>
+		/// <param name="json">The Json object used to construct a StructField</param>
+		/// <returns>A new StructField instance</returns>
+		public override sealed DataType FromJson(JObject json)
+		{
+			name = json["name"].ToString();
+			dataType = ParseDataTypeFromJson(json["type"]);
+			isNullable = (bool)json["nullable"];
+			metadata = (JObject)json["metadata"];
+			return this;
+		}
+
+		private string name;
+		private DataType dataType;
+		private bool isNullable;
+		[NonSerialized]
+		private JObject metadata;
+	}
+
+	/// <summary>
+	/// Struct type, consisting of a list of StructField
+	/// This is the data type representing a Row
+	/// </summary>
+	[Serializable]
+	public class StructType : ComplexType
+	{
+		/// <summary>
+		/// Gets a list of StructField.
+		/// </summary>
+		public List<StructField> Fields { get { return fields; } }
+
+
+		private Lazy<Func<dynamic, dynamic>[]> pickleConverters;
+
+		private Func<dynamic, dynamic>[] ConstructPickleConverters()
+		{
+			var funcs = new Func<dynamic, dynamic>[fields.Count];
+			int index = 0;
+			foreach (var field in fields)
+			{
+				if (field.DataType is StringType)
+				{
+					funcs[index] = x => x?.ToString();
+				}
+				/*else if (field.DataType is LongType)
+				{
+					funcs[index] = x => x==null?null:(dynamic)(long)x ;
+				}*/
+				/*else if (field.DataType is DateType)
+				{
+					funcs[index] = x => x;
+				}*/
+				else if (field.DataType is ArrayType)
+				{
+					Func<DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
+					{
+						StructField[] f = new StructField[length];
+						for (int i = 0; i < length; i++)
+						{
+							f[i] = new StructField(string.Format("_array_{0}", i), dataType);
+						}
+						return new StructType(f);
+					};
+					var elementType = (field.DataType as ArrayType).ElementType;
+					funcs[index] = x =>
+					{
+
+						// Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)), 
+						// then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
+						// In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
+						object[] valueOfArray = (x as ArrayList)?.ToArray() ?? x as object[];
+						if (valueOfArray == null)
+						{
+							throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
+						}
+
+						return new RowImpl(valueOfArray,
+							elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).Values; // TODO: this part may have some problems, not verified
+					};
+				}
+				else if (field.DataType is MapType)
+				{
+					//TODO
+					throw new NotImplementedException();
+				}
+				else if (field.DataType is StructType)
+				{
+					funcs[index] = x => x != null ? new RowImpl(x, field.DataType as StructType) : null;
+				}
+				else
+				{
+					funcs[index] = x => x;
+				}
+				index++;
+			}
+			return funcs;
+		}
+
+		internal IStructTypeProxy StructTypeProxy
+		{
+			get
+			{
+				return structTypeProxy ??
+					new StructTypeIpcProxy(
+						new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "createSchema",
+							new object[] { Json }).ToString()));
+			}
+		}
+
+		/// <summary>
+		/// Initializes a StructType instance with a specific collection of SructField object.
+		/// </summary>
+		/// <param name="fields">The collection that holds StructField objects</param>
+		public StructType(IEnumerable<StructField> fields)
+		{
+			this.fields = fields.ToList();
+			Initialize();
+		}
+
+		internal StructType(JObject json)
+		{
+			FromJson(json);
+			Initialize();
+		}
+
+		internal StructType(IStructTypeProxy structTypeProxy)
+		{
+			this.structTypeProxy = structTypeProxy;
+			var jsonSchema = structTypeProxy.ToJson();
+			FromJson(jsonSchema);
+			Initialize();
+		}
+
+		public void ConvertPickleObjects(dynamic[] input, dynamic[] output)
+		{
+			var c = pickleConverters.Value;
+			for (int i = 0; i < input.Length; ++i)
+			{
+				output[i] = c[i](input[i]);
+			}
+		}
+
+		private void Initialize()
+		{
+			pickleConverters = new Lazy<Func<dynamic, dynamic>[]>(ConstructPickleConverters);
+		}
+
+		/// <summary>
+		/// Returns a readable string that joins all <see cref="StructField"/>s together.
+		/// </summary>
+		public override string SimpleString
+		{
+			get { return string.Format(@"struct<{0}>", string.Join(",", fields.Select(f => f.SimpleString))); }
+		}
+
+		internal override object JsonValue
+		{
+			get
+			{
+				return new JObject(
+								new JProperty("type", TypeName),
+								new JProperty("fields", fields.Select(f => f.JsonValue).ToArray()));
+			}
+		}
+
+		/// <summary>
+		/// Constructs a StructType from a Json object
+		/// </summary>
+		/// <param name="json">The Json object used to construct a StructType</param>
+		/// <returns>A new StructType instance</returns>
+		public override sealed DataType FromJson(JObject json)
+		{
+			var fieldsJObjects = json["fields"].Select(f => (JObject)f);
+			fields = fieldsJObjects.Select(fieldJObject => (new StructField(fieldJObject))).ToList();
+			return this;
+		}
+
+		[NonSerialized]
+		private readonly IStructTypeProxy structTypeProxy;
+
+		private List<StructField> fields;
+	}
 
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs
index b9c50083..eaa602ba 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs
@@ -5,6 +5,7 @@
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
 using System.Linq;
+using System.Reflection;
 using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
@@ -249,6 +250,17 @@ public void RegisterFunction<RT>(string name, Func<RT> f)
             Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
             udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
         }
-        #endregion
-    }
+
+		public void RegisterFunction(string name, MethodInfo f)
+		{
+			if (!f.IsStatic)
+				throw new InvalidOperationException(f.DeclaringType?.FullName + "." + f.Name +
+				                                    " is not a static method, can't be registered");
+			logger.LogInfo("Name of the function to register {0}, method info", name, f.DeclaringType?.FullName + "." + f.Name);
+			var helper = new UdfReflectionHelper(f);
+			Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = helper.Execute;
+			udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(helper.ReturnType));
+		}
+		#endregion
+	}
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/packages.config b/csharp/Adapter/Microsoft.Spark.CSharp/packages.config
index 8f5143eb..d95f59da 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/packages.config
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
-  <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
+  <package id="log4net" version="2.0.8" targetFramework="net45" />
+  <package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0" targetFramework="net45" />
-</packages>
+</packages>
\ No newline at end of file
diff --git a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
index 0d192a55..f7d5b481 100644
--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
@@ -3513,7 +3513,7 @@
             Close the socket connections and releases all associated resources.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
             <summary>
             Establishes a connection to a remote host that is specified by an IP address and a port number
             </summary>
@@ -3612,12 +3612,13 @@
             Close the ISocket connections and releases all associated resources.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
             <summary>
             Establishes a connection to a remote host that is specified by an IP address and a port number
             </summary>
             <param name="remoteaddr">The IP address of the remote host</param>
             <param name="port">The port number of the remote host</param>
+            <param name="secret">The secret to connect, can be null</param>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.GetStream">
             <summary>
@@ -3770,7 +3771,7 @@
             Close the ISocket connections and releases all associated resources.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.RioSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Network.RioSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
             <summary>
             Establishes a connection to a remote host that is specified by an IP address and a port number
             </summary>
@@ -3912,7 +3913,7 @@
             Close the ISocket connections and releases all associated resources.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.Connect(System.Net.IPAddress,System.Int32,System.String)">
             <summary>
             Establishes a connection to a remote host that is specified by an IP address and a port number
             </summary>
@@ -5190,12 +5191,13 @@
             </summary>
             <returns>row count</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Show(System.Int32,System.Boolean)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Show(System.Int32,System.Int32,System.Boolean)">
             <summary>
             Displays rows of the DataFrame in tabular form
             </summary>
             <param name="numberOfRows">Number of rows to display - default 20</param>
             <param name="truncate">Indicates if strings more than 20 characters long will be truncated</param>
+            <param name="vertical">If set to True, print output rows vertically (one line per column value).</param>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.ShowSchema">
             <summary>
@@ -5627,10 +5629,11 @@
             the 100 new partitions will claim 10 of the current partitions.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Persist">
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Persist(Microsoft.Spark.CSharp.Core.StorageLevelType)">
             <summary>
             Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
             </summary>
+            <param name="type">Persist storage type</param>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.Unpersist(System.Boolean)">
             <summary>
@@ -6040,6 +6043,15 @@
             DataFrame if no paths are passed in.
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrameReader.Avro(System.String)">
+            <summary>
+            Loads a AVRO file (one object per line) and returns the result as a DataFrame.
+            
+            This function goes through the input once to determine the input schema. If you know the
+            schema in advance, use the version that specifies the schema to avoid the extra scan.
+            </summary>
+            <param name="path">input path</param>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.DataFrameWriter">
             <summary>
             Interface used to write a DataFrame to external storage systems (e.g. file systems,
@@ -6145,6 +6157,13 @@
                Format("parquet").Save(path)
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataFrameWriter.Avro(System.String)">
+            <summary>
+            Saves the content of the DataFrame in AVRO format at the specified path.
+            This is equivalent to:
+               Format("com.databricks.spark.avro").Save(path)
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.Dataset">
             <summary>
              Dataset is a strongly typed collection of domain-specific objects that can be transformed
@@ -6193,13 +6212,14 @@
             Returns all column names as an array.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Show(System.Int32,System.Boolean)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Show(System.Int32,System.Int32,System.Boolean)">
             <summary>
             Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters
             will be truncated, and all cells will be aligned right.
             </summary>
             <param name="numberOfRows">Number of rows - default is 20</param>
             <param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
+            <param name="vertical">If set to true, prints output rows vertically (one line per column value).</param>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.ShowSchema">
             <summary>
diff --git a/csharp/Adapter/documentation/Mobius_API_Documentation.md b/csharp/Adapter/documentation/Mobius_API_Documentation.md
index c9e40654..7ee0e9ee 100644
--- a/csharp/Adapter/documentation/Mobius_API_Documentation.md
+++ b/csharp/Adapter/documentation/Mobius_API_Documentation.md
@@ -638,7 +638,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">RegisterTempTable</font></td><td>Registers this DataFrame as a temporary table using the given name. The lifetime of this temporary table is tied to the SqlContext that was used to create this DataFrame.</td></tr><tr><td><font color="blue">Count</font></td><td>Number of rows in the DataFrame</td></tr><tr><td><font color="blue">Show</font></td><td>Displays rows of the DataFrame in tabular form</td></tr><tr><td><font color="blue">ShowSchema</font></td><td>Prints the schema information of the DataFrame</td></tr><tr><td><font color="blue">Collect</font></td><td>Returns all of Rows in this DataFrame</td></tr><tr><td><font color="blue">ToRDD</font></td><td>Converts the DataFrame to RDD of Row</td></tr><tr><td><font color="blue">ToJSON</font></td><td>Returns the content of the DataFrame as RDD of JSON strings</td></tr><tr><td><font color="blue">Explain</font></td><td>Prints the plans (logical and physical) to the console for debugging purposes</td></tr><tr><td><font color="blue">Select</font></td><td>Selects a set of columns specified by column name or Column. df.Select("colA", df["colB"]) df.Select("*", df["colB"] + 10)</td></tr><tr><td><font color="blue">Select</font></td><td>Selects a set of columns. This is a variant of `select` that can only select existing columns using column names (i.e. cannot construct expressions). df.Select("colA", "colB")</td></tr><tr><td><font color="blue">SelectExpr</font></td><td>Selects a set of SQL expressions. This is a variant of `select` that accepts SQL expressions. df.SelectExpr("colA", "colB as newName", "abs(colC)")</td></tr><tr><td><font color="blue">Where</font></td><td>Filters rows using the given condition</td></tr><tr><td><font color="blue">Filter</font></td><td>Filters rows using the given condition</td></tr><tr><td><font color="blue">GroupBy</font></td><td>Groups the DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Rollup</font></td><td>Create a multi-dimensional rollup for the current DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Cube</font></td><td>Create a multi-dimensional cube for the current DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Agg</font></td><td>Aggregates on the DataFrame for the given column-aggregate function mapping</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Cartesian join</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Inner equi-join using given column name</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Inner equi-join using given column name</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame, using the specified JoinType</td></tr><tr><td><font color="blue">Intersect</font></td><td>Intersect with another DataFrame. This is equivalent to `INTERSECT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, intersect(self, other)</td></tr><tr><td><font color="blue">UnionAll</font></td><td>Union with another DataFrame WITHOUT removing duplicated rows. This is equivalent to `UNION ALL` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, unionAll(self, other)</td></tr><tr><td><font color="blue">Subtract</font></td><td>Returns a new DataFrame containing rows in this frame but not in another frame. This is equivalent to `EXCEPT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, subtract(self, other)</td></tr><tr><td><font color="blue">Drop</font></td><td>Returns a new DataFrame with a column dropped. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, drop(self, col)</td></tr><tr><td><font color="blue">DropNa</font></td><td>Returns a new DataFrame omitting rows with null values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropna(self, how='any', thresh=None, subset=None)</td></tr><tr><td><font color="blue">Na</font></td><td>Returns a DataFrameNaFunctions for working with missing data.</td></tr><tr><td><font color="blue">FillNa</font></td><td>Replace null values, alias for ``na.fill()`</td></tr><tr><td><font color="blue">DropDuplicates</font></td><td>Returns a new DataFrame with duplicate rows removed, considering only the subset of columns. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropDuplicates(self, subset=None)</td></tr><tr><td><font color="blue">Replace``1</font></td><td>Returns a new DataFrame replacing a value with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">ReplaceAll``1</font></td><td>Returns a new DataFrame replacing values with other values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">ReplaceAll``1</font></td><td>Returns a new DataFrame replacing values with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">RandomSplit</font></td><td>Randomly splits this DataFrame with the provided weights. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, randomSplit(self, weights, seed=None)</td></tr><tr><td><font color="blue">Columns</font></td><td>Returns all column names as a list. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, columns(self)</td></tr><tr><td><font color="blue">DTypes</font></td><td>Returns all column names and their data types. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dtypes(self)</td></tr><tr><td><font color="blue">Sort</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">Sort</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">SortWithinPartitions</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">SortWithinPartition</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">Alias</font></td><td>Returns a new DataFrame with an alias set. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, alias(self, alias)</td></tr><tr><td><font color="blue">WithColumn</font></td><td>Returns a new DataFrame by adding a column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumn(self, colName, col)</td></tr><tr><td><font color="blue">WithColumnRenamed</font></td><td>Returns a new DataFrame by renaming an existing column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumnRenamed(self, existing, new)</td></tr><tr><td><font color="blue">Corr</font></td><td>Calculates the correlation of two columns of a DataFrame as a double value. Currently only supports the Pearson Correlation Coefficient. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, corr(self, col1, col2, method=None)</td></tr><tr><td><font color="blue">Cov</font></td><td>Calculate the sample covariance of two columns as a double value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, cov(self, col1, col2)</td></tr><tr><td><font color="blue">FreqItems</font></td><td>Finding frequent items for columns, possibly with false positives. Using the frequent element count algorithm described in "http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou". Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, freqItems(self, cols, support=None) Note: This function is meant for exploratory data analysis, as we make no guarantee about the backward compatibility of the schema of the resulting DataFrame.</td></tr><tr><td><font color="blue">Crosstab</font></td><td>Computes a pair-wise frequency table of the given columns. Also known as a contingency table. The number of distinct values for each column should be less than 1e4. At most 1e6 non-zero pair frequencies will be returned. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, crosstab(self, col1, col2)</td></tr><tr><td><font color="blue">Describe</font></td><td>Computes statistics for numeric columns. This include count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical columns.</td></tr><tr><td><font color="blue">Limit</font></td><td>Returns a new DataFrame by taking the first `n` rows. The difference between this function and `head` is that `head` returns an array while `limit` returns a new DataFrame.</td></tr><tr><td><font color="blue">Head</font></td><td>Returns the first `n` rows.</td></tr><tr><td><font color="blue">First</font></td><td>Returns the first row.</td></tr><tr><td><font color="blue">Take</font></td><td>Returns the first `n` rows in the DataFrame.</td></tr><tr><td><font color="blue">Distinct</font></td><td>Returns a new DataFrame that contains only the unique rows from this DataFrame.</td></tr><tr><td><font color="blue">Coalesce</font></td><td>Returns a new DataFrame that has exactly `numPartitions` partitions. Similar to coalesce defined on an RDD, this operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of the current partitions.</td></tr><tr><td><font color="blue">Persist</font></td><td>Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)</td></tr><tr><td><font color="blue">Unpersist</font></td><td>Mark the DataFrame as non-persistent, and remove all blocks for it from memory and disk.</td></tr><tr><td><font color="blue">Cache</font></td><td>Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new DataFrame that has exactly `numPartitions` partitions.</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.</td></tr><tr><td><font color="blue">Sample</font></td><td>Returns a new DataFrame by sampling a fraction of rows.</td></tr><tr><td><font color="blue">FlatMap``1</font></td><td>Returns a new RDD by first applying a function to all rows of this DataFrame, and then flattening the results.</td></tr><tr><td><font color="blue">Map``1</font></td><td>Returns a new RDD by applying a function to all rows of this DataFrame.</td></tr><tr><td><font color="blue">MapPartitions``1</font></td><td>Returns a new RDD by applying a function to each partition of this DataFrame.</td></tr><tr><td><font color="blue">ForeachPartition</font></td><td>Applies a function f to each partition of this DataFrame.</td></tr><tr><td><font color="blue">Foreach</font></td><td>Applies a function f to all rows.</td></tr><tr><td><font color="blue">Write</font></td><td>Interface for saving the content of the DataFrame out into external storage.</td></tr><tr><td><font color="blue">SaveAsParquetFile</font></td><td>Saves the contents of this DataFrame as a parquet file, preserving the schema. Files that are written out using this method can be read back in as a DataFrame using the `parquetFile` function in SQLContext.</td></tr><tr><td><font color="blue">InsertInto</font></td><td>Adds the rows from this RDD to the specified table, optionally overwriting the existing data.</td></tr><tr><td><font color="blue">SaveAsTable</font></td><td>Creates a table from the the contents of this DataFrame based on a given data source, SaveMode specified by mode, and a set of options. Note that this currently only works with DataFrames that are created from a HiveContext as there is no notion of a persisted catalog in a standard SQL context. Instead you can write an RDD out to a parquet file, and then register that file as a table. This "table" can then be the target of an `insertInto`. Also note that while this function can persist the table metadata into Hive's metastore, the table will NOT be accessible from Hive, until SPARK-7550 is resolved.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the contents of this DataFrame based on the given data source, SaveMode specified by mode, and a set of options.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing any null values.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing null values. If `how` is "any", then drop rows containing any null values. If `how` is "all", then drop rows only if every column is null for that row.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new [[DataFrame]] that drops rows containing null values in the specified columns. If `how` is "any", then drop rows containing any null values in the specified columns. If `how` is "all", then drop rows only if every specified column is null for that row.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing any null values in the specified columns.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values values in the specified columns.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in numeric columns with `value`.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in string columns with `value`.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in specified numeric columns. If a specified column is not a numeric column, it is ignored.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in specified string columns. If a specified column is not a numeric column, it is ignored.</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. The value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`. For example, the following replaces null values in column "A" with string "unknown", and null values in column "B" with numeric value 1.0. import com.google.common.collect.ImmutableMap; df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "*", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height". df.replace("height", ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name". df.replace("name", ImmutableMap.of("UNKNOWN", "unnamed")); // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns. df.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "*", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight". df.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname". df.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));</td></tr><tr><td><font color="blue"></font></td><td>Specifies the input data source format.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.</td></tr><tr><td><font color="blue"></font></td><td>Adds an input option for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Adds input options for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).</td></tr><tr><td><font color="blue"></font></td><td>Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).</td></tr><tr><td><font color="blue"></font></td><td>Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.</td></tr><tr><td><font color="blue"></font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue"></font></td><td>Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the underlying output data source. Built-in options include "parquet", "json", etc.</td></tr><tr><td><font color="blue"></font></td><td>Adds an output option for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Adds output options for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame at the specified path.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame as the specified table.</td></tr><tr><td><font color="blue"></font></td><td>Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">RegisterTempTable</font></td><td>Registers this DataFrame as a temporary table using the given name. The lifetime of this temporary table is tied to the SqlContext that was used to create this DataFrame.</td></tr><tr><td><font color="blue">Count</font></td><td>Number of rows in the DataFrame</td></tr><tr><td><font color="blue">Show</font></td><td>Displays rows of the DataFrame in tabular form</td></tr><tr><td><font color="blue">ShowSchema</font></td><td>Prints the schema information of the DataFrame</td></tr><tr><td><font color="blue">Collect</font></td><td>Returns all of Rows in this DataFrame</td></tr><tr><td><font color="blue">ToRDD</font></td><td>Converts the DataFrame to RDD of Row</td></tr><tr><td><font color="blue">ToJSON</font></td><td>Returns the content of the DataFrame as RDD of JSON strings</td></tr><tr><td><font color="blue">Explain</font></td><td>Prints the plans (logical and physical) to the console for debugging purposes</td></tr><tr><td><font color="blue">Select</font></td><td>Selects a set of columns specified by column name or Column. df.Select("colA", df["colB"]) df.Select("*", df["colB"] + 10)</td></tr><tr><td><font color="blue">Select</font></td><td>Selects a set of columns. This is a variant of `select` that can only select existing columns using column names (i.e. cannot construct expressions). df.Select("colA", "colB")</td></tr><tr><td><font color="blue">SelectExpr</font></td><td>Selects a set of SQL expressions. This is a variant of `select` that accepts SQL expressions. df.SelectExpr("colA", "colB as newName", "abs(colC)")</td></tr><tr><td><font color="blue">Where</font></td><td>Filters rows using the given condition</td></tr><tr><td><font color="blue">Filter</font></td><td>Filters rows using the given condition</td></tr><tr><td><font color="blue">GroupBy</font></td><td>Groups the DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Rollup</font></td><td>Create a multi-dimensional rollup for the current DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Cube</font></td><td>Create a multi-dimensional cube for the current DataFrame using the specified columns, so we can run aggregation on them.</td></tr><tr><td><font color="blue">Agg</font></td><td>Aggregates on the DataFrame for the given column-aggregate function mapping</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Cartesian join</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Inner equi-join using given column name</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame - Inner equi-join using given column name</td></tr><tr><td><font color="blue">Join</font></td><td>Join with another DataFrame, using the specified JoinType</td></tr><tr><td><font color="blue">Intersect</font></td><td>Intersect with another DataFrame. This is equivalent to `INTERSECT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, intersect(self, other)</td></tr><tr><td><font color="blue">UnionAll</font></td><td>Union with another DataFrame WITHOUT removing duplicated rows. This is equivalent to `UNION ALL` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, unionAll(self, other)</td></tr><tr><td><font color="blue">Subtract</font></td><td>Returns a new DataFrame containing rows in this frame but not in another frame. This is equivalent to `EXCEPT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, subtract(self, other)</td></tr><tr><td><font color="blue">Drop</font></td><td>Returns a new DataFrame with a column dropped. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, drop(self, col)</td></tr><tr><td><font color="blue">DropNa</font></td><td>Returns a new DataFrame omitting rows with null values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropna(self, how='any', thresh=None, subset=None)</td></tr><tr><td><font color="blue">Na</font></td><td>Returns a DataFrameNaFunctions for working with missing data.</td></tr><tr><td><font color="blue">FillNa</font></td><td>Replace null values, alias for ``na.fill()`</td></tr><tr><td><font color="blue">DropDuplicates</font></td><td>Returns a new DataFrame with duplicate rows removed, considering only the subset of columns. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropDuplicates(self, subset=None)</td></tr><tr><td><font color="blue">Replace``1</font></td><td>Returns a new DataFrame replacing a value with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">ReplaceAll``1</font></td><td>Returns a new DataFrame replacing values with other values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">ReplaceAll``1</font></td><td>Returns a new DataFrame replacing values with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)</td></tr><tr><td><font color="blue">RandomSplit</font></td><td>Randomly splits this DataFrame with the provided weights. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, randomSplit(self, weights, seed=None)</td></tr><tr><td><font color="blue">Columns</font></td><td>Returns all column names as a list. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, columns(self)</td></tr><tr><td><font color="blue">DTypes</font></td><td>Returns all column names and their data types. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dtypes(self)</td></tr><tr><td><font color="blue">Sort</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">Sort</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">SortWithinPartitions</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">SortWithinPartition</font></td><td>Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, *cols, **kwargs)</td></tr><tr><td><font color="blue">Alias</font></td><td>Returns a new DataFrame with an alias set. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, alias(self, alias)</td></tr><tr><td><font color="blue">WithColumn</font></td><td>Returns a new DataFrame by adding a column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumn(self, colName, col)</td></tr><tr><td><font color="blue">WithColumnRenamed</font></td><td>Returns a new DataFrame by renaming an existing column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumnRenamed(self, existing, new)</td></tr><tr><td><font color="blue">Corr</font></td><td>Calculates the correlation of two columns of a DataFrame as a double value. Currently only supports the Pearson Correlation Coefficient. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, corr(self, col1, col2, method=None)</td></tr><tr><td><font color="blue">Cov</font></td><td>Calculate the sample covariance of two columns as a double value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, cov(self, col1, col2)</td></tr><tr><td><font color="blue">FreqItems</font></td><td>Finding frequent items for columns, possibly with false positives. Using the frequent element count algorithm described in "http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou". Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, freqItems(self, cols, support=None) Note: This function is meant for exploratory data analysis, as we make no guarantee about the backward compatibility of the schema of the resulting DataFrame.</td></tr><tr><td><font color="blue">Crosstab</font></td><td>Computes a pair-wise frequency table of the given columns. Also known as a contingency table. The number of distinct values for each column should be less than 1e4. At most 1e6 non-zero pair frequencies will be returned. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, crosstab(self, col1, col2)</td></tr><tr><td><font color="blue">Describe</font></td><td>Computes statistics for numeric columns. This include count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical columns.</td></tr><tr><td><font color="blue">Limit</font></td><td>Returns a new DataFrame by taking the first `n` rows. The difference between this function and `head` is that `head` returns an array while `limit` returns a new DataFrame.</td></tr><tr><td><font color="blue">Head</font></td><td>Returns the first `n` rows.</td></tr><tr><td><font color="blue">First</font></td><td>Returns the first row.</td></tr><tr><td><font color="blue">Take</font></td><td>Returns the first `n` rows in the DataFrame.</td></tr><tr><td><font color="blue">Distinct</font></td><td>Returns a new DataFrame that contains only the unique rows from this DataFrame.</td></tr><tr><td><font color="blue">Coalesce</font></td><td>Returns a new DataFrame that has exactly `numPartitions` partitions. Similar to coalesce defined on an RDD, this operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of the current partitions.</td></tr><tr><td><font color="blue">Persist</font></td><td>Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)</td></tr><tr><td><font color="blue">Unpersist</font></td><td>Mark the DataFrame as non-persistent, and remove all blocks for it from memory and disk.</td></tr><tr><td><font color="blue">Cache</font></td><td>Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new DataFrame that has exactly `numPartitions` partitions.</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.</td></tr><tr><td><font color="blue">Repartition</font></td><td>Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.</td></tr><tr><td><font color="blue">Sample</font></td><td>Returns a new DataFrame by sampling a fraction of rows.</td></tr><tr><td><font color="blue">FlatMap``1</font></td><td>Returns a new RDD by first applying a function to all rows of this DataFrame, and then flattening the results.</td></tr><tr><td><font color="blue">Map``1</font></td><td>Returns a new RDD by applying a function to all rows of this DataFrame.</td></tr><tr><td><font color="blue">MapPartitions``1</font></td><td>Returns a new RDD by applying a function to each partition of this DataFrame.</td></tr><tr><td><font color="blue">ForeachPartition</font></td><td>Applies a function f to each partition of this DataFrame.</td></tr><tr><td><font color="blue">Foreach</font></td><td>Applies a function f to all rows.</td></tr><tr><td><font color="blue">Write</font></td><td>Interface for saving the content of the DataFrame out into external storage.</td></tr><tr><td><font color="blue">SaveAsParquetFile</font></td><td>Saves the contents of this DataFrame as a parquet file, preserving the schema. Files that are written out using this method can be read back in as a DataFrame using the `parquetFile` function in SQLContext.</td></tr><tr><td><font color="blue">InsertInto</font></td><td>Adds the rows from this RDD to the specified table, optionally overwriting the existing data.</td></tr><tr><td><font color="blue">SaveAsTable</font></td><td>Creates a table from the the contents of this DataFrame based on a given data source, SaveMode specified by mode, and a set of options. Note that this currently only works with DataFrames that are created from a HiveContext as there is no notion of a persisted catalog in a standard SQL context. Instead you can write an RDD out to a parquet file, and then register that file as a table. This "table" can then be the target of an `insertInto`. Also note that while this function can persist the table metadata into Hive's metastore, the table will NOT be accessible from Hive, until SPARK-7550 is resolved.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the contents of this DataFrame based on the given data source, SaveMode specified by mode, and a set of options.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing any null values.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing null values. If `how` is "any", then drop rows containing any null values. If `how` is "all", then drop rows only if every column is null for that row.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new [[DataFrame]] that drops rows containing null values in the specified columns. If `how` is "any", then drop rows containing any null values in the specified columns. If `how` is "all", then drop rows only if every specified column is null for that row.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing any null values in the specified columns.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values values in the specified columns.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in numeric columns with `value`.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in string columns with `value`.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in specified numeric columns. If a specified column is not a numeric column, it is ignored.</td></tr><tr><td><font color="blue"></font></td><td>Returns a new DataFrame that replaces null values in specified string columns. If a specified column is not a numeric column, it is ignored.</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. The value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`. For example, the following replaces null values in column "A" with string "unknown", and null values in column "B" with numeric value 1.0. import com.google.common.collect.ImmutableMap; df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "*", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height". df.replace("height", ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name". df.replace("name", ImmutableMap.of("UNKNOWN", "unnamed")); // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns. df.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));</td></tr><tr><td><font color="blue"></font></td><td>Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "*", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight". df.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname". df.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));</td></tr><tr><td><font color="blue"></font></td><td>Specifies the input data source format.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.</td></tr><tr><td><font color="blue"></font></td><td>Adds an input option for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Adds input options for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).</td></tr><tr><td><font color="blue"></font></td><td>Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).</td></tr><tr><td><font color="blue"></font></td><td>Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.</td></tr><tr><td><font color="blue"></font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue"></font></td><td>Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.</td></tr><tr><td><font color="blue"></font></td><td>Loads a AVRO file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue"></font></td><td>Specifies the underlying output data source. Built-in options include "parquet", "json", etc.</td></tr><tr><td><font color="blue"></font></td><td>Adds an output option for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Adds output options for the underlying data source.</td></tr><tr><td><font color="blue"></font></td><td>Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame at the specified path.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame as the specified table.</td></tr><tr><td><font color="blue"></font></td><td>Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)</td></tr><tr><td><font color="blue"></font></td><td>Saves the content of the DataFrame in AVRO format at the specified path. This is equivalent to: Format("com.databricks.spark.avro").Save(path)</td></tr></table>
 
 ---
   
@@ -688,7 +688,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Format</font></td><td>Specifies the input data source format.</td></tr><tr><td><font color="blue">Schema</font></td><td>Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.</td></tr><tr><td><font color="blue">Option</font></td><td>Adds an input option for the underlying data source.</td></tr><tr><td><font color="blue">Options</font></td><td>Adds input options for the underlying data source.</td></tr><tr><td><font color="blue">Load</font></td><td>Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).</td></tr><tr><td><font color="blue">Load</font></td><td>Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Json</font></td><td>Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue">Parquet</font></td><td>Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Format</font></td><td>Specifies the input data source format.</td></tr><tr><td><font color="blue">Schema</font></td><td>Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.</td></tr><tr><td><font color="blue">Option</font></td><td>Adds an input option for the underlying data source.</td></tr><tr><td><font color="blue">Options</font></td><td>Adds input options for the underlying data source.</td></tr><tr><td><font color="blue">Load</font></td><td>Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).</td></tr><tr><td><font color="blue">Load</font></td><td>Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Json</font></td><td>Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr><tr><td><font color="blue">Parquet</font></td><td>Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.</td></tr><tr><td><font color="blue">Avro</font></td><td>Loads a AVRO file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.</td></tr></table>
 
 ---
   
@@ -705,7 +705,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Mode</font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue">Mode</font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue">Format</font></td><td>Specifies the underlying output data source. Built-in options include "parquet", "json", etc.</td></tr><tr><td><font color="blue">Option</font></td><td>Adds an output option for the underlying data source.</td></tr><tr><td><font color="blue">Options</font></td><td>Adds output options for the underlying data source.</td></tr><tr><td><font color="blue">PartitionBy</font></td><td>Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the content of the DataFrame at the specified path.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the content of the DataFrame as the specified table.</td></tr><tr><td><font color="blue">InsertInto</font></td><td>Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.</td></tr><tr><td><font color="blue">SaveAsTable</font></td><td>Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Json</font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)</td></tr><tr><td><font color="blue">Parquet</font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Mode</font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue">Mode</font></td><td>Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</td></tr><tr><td><font color="blue">Format</font></td><td>Specifies the underlying output data source. Built-in options include "parquet", "json", etc.</td></tr><tr><td><font color="blue">Option</font></td><td>Adds an output option for the underlying data source.</td></tr><tr><td><font color="blue">Options</font></td><td>Adds output options for the underlying data source.</td></tr><tr><td><font color="blue">PartitionBy</font></td><td>Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the content of the DataFrame at the specified path.</td></tr><tr><td><font color="blue">Save</font></td><td>Saves the content of the DataFrame as the specified table.</td></tr><tr><td><font color="blue">InsertInto</font></td><td>Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.</td></tr><tr><td><font color="blue">SaveAsTable</font></td><td>Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.</td></tr><tr><td><font color="blue">Jdbc</font></td><td>Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.</td></tr><tr><td><font color="blue">Json</font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)</td></tr><tr><td><font color="blue">Parquet</font></td><td>Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)</td></tr><tr><td><font color="blue">Avro</font></td><td>Saves the content of the DataFrame in AVRO format at the specified path. This is equivalent to: Format("com.databricks.spark.avro").Save(path)</td></tr></table>
 
 ---
   
diff --git a/csharp/AdapterTest/AccumulatorTest.cs b/csharp/AdapterTest/AccumulatorTest.cs
index 24ccfb57..75fb938e 100644
--- a/csharp/AdapterTest/AccumulatorTest.cs
+++ b/csharp/AdapterTest/AccumulatorTest.cs
@@ -33,7 +33,7 @@ public void TestInitialize()
             // get accumulator server port and connect to accumuator server
             int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort;
             sock = SocketFactory.CreateSocket();
-            sock.Connect(IPAddress.Loopback, serverPort);
+            sock.Connect(IPAddress.Loopback, serverPort, null);
         }
 
         [TearDown]
diff --git a/csharp/AdapterTest/AdapterTest.csproj b/csharp/AdapterTest/AdapterTest.csproj
index c32ed7aa..cbea5478 100644
--- a/csharp/AdapterTest/AdapterTest.csproj
+++ b/csharp/AdapterTest/AdapterTest.csproj
@@ -35,22 +35,25 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
+    <Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <HintPath>..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
+    </Reference>
     <Reference Include="Microsoft.CSharp" />
     <Reference Include="Moq, Version=4.2.1510.2205, Culture=neutral, PublicKeyToken=69f491c39445e920, processorArchitecture=MSIL">
       <HintPath>..\packages\Moq.4.2.1510.2205\lib\net40\Moq.dll</HintPath>
       <Private>True</Private>
     </Reference>
-    <Reference Include="Newtonsoft.Json">
-      <HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
+    <Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
+      <HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
     <Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
       <HintPath>..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
       <Private>True</Private>
     </Reference>
-    <Reference Include="Razorvine.Pyrolite">
+    <Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
       <HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
     </Reference>
-    <Reference Include="Razorvine.Serpent">
+    <Reference Include="Razorvine.Serpent, Version=1.12.0.35091, Culture=neutral, processorArchitecture=MSIL">
       <HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
     </Reference>
     <Reference Include="System" />
diff --git a/csharp/AdapterTest/DataFrameTest.cs b/csharp/AdapterTest/DataFrameTest.cs
index d54a9c31..34a6dfbc 100644
--- a/csharp/AdapterTest/DataFrameTest.cs
+++ b/csharp/AdapterTest/DataFrameTest.cs
@@ -12,6 +12,7 @@
 using Microsoft.Spark.CSharp.Proxy;
 using NUnit.Framework;
 using Moq;
+using Microsoft.Spark.CSharp.Network;
 
 namespace AdapterTest
 {
@@ -65,10 +66,10 @@ public void TestDataFrameCount()
         [Test]
         public void TestShow()
         {
-            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
+            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
             var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
             dataFrame.Show();
-            mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
+            mockDataFrameProxy.Verify(m => m.GetShowString(20, 20, false), Times.Once);
         }
 
         [Test]
@@ -135,9 +136,9 @@ public void TestDataFrameCollect()
             var expectedRows = new Row[] {new MockRow(), new MockRow()};
             var mockRddProxy = new Mock<IRDDProxy>();
             var mockRddCollector = new Mock<IRDDCollector>();
-            mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
+            mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
                 .Returns(expectedRows);
-            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
+            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123,null));
             mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
             mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
             var dataFrame = new DataFrame(mockDataFrameProxy.Object, null);
@@ -838,9 +839,9 @@ public void TestHead()
             var expectedRows = new Row[] {new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow()};
             var mockRddProxy = new Mock<IRDDProxy>();
             var mockRddCollector = new Mock<IRDDCollector>();
-            mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
+            mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
                 .Returns(expectedRows);
-            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
+            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
             mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
             mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
             mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
@@ -868,9 +869,9 @@ public void TestFirst()
             var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
             var mockRddProxy = new Mock<IRDDProxy>();
             var mockRddCollector = new Mock<IRDDCollector>();
-            mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
+            mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
                 .Returns(expectedRows);
-            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
+            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
             mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
             mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
             mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
@@ -892,9 +893,9 @@ public void TestTake()
             var expectedRows = new Row[] { new MockRow(), new MockRow(), new MockRow(), new MockRow(), new MockRow() };
             var mockRddProxy = new Mock<IRDDProxy>();
             var mockRddCollector = new Mock<IRDDCollector>();
-            mockRddCollector.Setup(m => m.Collect(It.IsAny<int>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
+            mockRddCollector.Setup(m => m.Collect(It.IsAny<SocketInfo>(), It.IsAny<SerializedMode>(), It.IsAny<Type>()))
                 .Returns(expectedRows);
-            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(123);
+            mockRddProxy.Setup(m => m.CollectAndServe()).Returns(new SocketInfo(123, null));
             mockRddProxy.Setup(m => m.RDDCollector).Returns(mockRddCollector.Object);
             mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(mockRddProxy.Object);
             mockDataFrameProxy.Setup(m => m.Limit(It.IsAny<int>())).Returns(mockDataFrameProxy.Object);
diff --git a/csharp/AdapterTest/DatasetTest.cs b/csharp/AdapterTest/DatasetTest.cs
index 7ee59db9..b9000410 100644
--- a/csharp/AdapterTest/DatasetTest.cs
+++ b/csharp/AdapterTest/DatasetTest.cs
@@ -38,12 +38,12 @@ public void TestCleanUp()
         public void TestShow()
         {
             Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
-            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
+            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
             mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
 
             var dataset = new Dataset(mockDatasetProxy.Object);
             dataset.Show();
-            mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
+            mockDataFrameProxy.Verify(m => m.GetShowString(20, 20, false), Times.Once);
         }
 
         [Test]
diff --git a/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs b/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs
index a68d4082..60e84fbc 100644
--- a/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockDataFrameProxy.cs
@@ -9,6 +9,7 @@
 using System.Net;
 using System.Net.Sockets;
 using System.IO;
+using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Sql;
 using Razorvine.Pickle;
 using Microsoft.Spark.CSharp.Proxy;
@@ -64,7 +65,7 @@ public string GetExecutedPlan()
             throw new NotImplementedException();
         }
 
-        public string GetShowString(int numberOfRows, bool truncate)
+        public string GetShowString(int numberOfRows, int truncate, bool vertical)
         {
             throw new NotImplementedException();
         }
@@ -240,7 +241,12 @@ public IDataFrameProxy Sample(bool withReplacement, double fraction, long seed)
             throw new NotImplementedException();
         }
 
-        public IDataFrameWriterProxy Write()
+	    public IDataFrameProxy Broadcast()
+	    {
+		    throw new NotImplementedException();
+	    }
+
+	    public IDataFrameWriterProxy Write()
         {
             throw new NotImplementedException();
         }
diff --git a/csharp/AdapterTest/Mocks/MockRDDCollector.cs b/csharp/AdapterTest/Mocks/MockRDDCollector.cs
index 2ec5c627..e9c8c5c7 100644
--- a/csharp/AdapterTest/Mocks/MockRDDCollector.cs
+++ b/csharp/AdapterTest/Mocks/MockRDDCollector.cs
@@ -4,12 +4,13 @@
 using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Network;
 
 namespace AdapterTest.Mocks
 {
     class MockRDDCollector : IRDDCollector
     {
-        public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
+        public IEnumerable<dynamic> Collect(SocketInfo port, SerializedMode serializedMode, Type type)
         {
             throw new NotImplementedException();
         }
diff --git a/csharp/AdapterTest/Mocks/MockRddProxy.cs b/csharp/AdapterTest/Mocks/MockRddProxy.cs
index 03b01427..9188ea40 100644
--- a/csharp/AdapterTest/Mocks/MockRddProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockRddProxy.cs
@@ -15,6 +15,7 @@
 using Microsoft.Spark.CSharp.Proxy;
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using NUnit.Framework;
+using Microsoft.Spark.CSharp.Network;
 
 namespace AdapterTest.Mocks
 {
@@ -60,7 +61,7 @@ public IRDDProxy Union(IRDDProxy javaRddReferenceOther)
             return union;
         }
 
-        public int CollectAndServe()
+        public SocketInfo CollectAndServe()
         {
             return MockSparkContextProxy.RunJob(this);
         }
diff --git a/csharp/AdapterTest/Mocks/MockRow.cs b/csharp/AdapterTest/Mocks/MockRow.cs
index bfa5b73b..a6a9a86e 100644
--- a/csharp/AdapterTest/Mocks/MockRow.cs
+++ b/csharp/AdapterTest/Mocks/MockRow.cs
@@ -8,6 +8,13 @@ namespace AdapterTest.Mocks
 {
     public class MockRow : Row
     {
+        public override dynamic[] Values
+        {
+            get
+            {
+                throw new NotImplementedException();
+            }
+        }
 
         public override int Size()
         {
diff --git a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
index 609e591c..da8b853c 100644
--- a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
@@ -195,7 +195,7 @@ public IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, st
             throw new NotImplementedException();
         }
 
-        internal static int RunJob(IRDDProxy rdd)
+        internal static SocketInfo RunJob(IRDDProxy rdd)
         {
             var mockRdd = (rdd as MockRddProxy);
             IEnumerable<byte[]> result = mockRdd.pickle ? mockRdd.result.Cast<byte[]>() :
@@ -222,10 +222,12 @@ internal static int RunJob(IRDDProxy rdd)
                     ns.Flush();
                 }
             });
-            return (listener.LocalEndPoint as IPEndPoint).Port;
+
+            SocketInfo socketInfo = new SocketInfo((listener.LocalEndPoint as IPEndPoint).Port, null);
+            return  socketInfo;
         }
 
-        public int RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
+        public SocketInfo RunJob(IRDDProxy rdd, IEnumerable<int> partitions)
         {
             return RunJob(rdd);
         }
diff --git a/csharp/AdapterTest/SocketWrapperTest.cs b/csharp/AdapterTest/SocketWrapperTest.cs
index 3c7fac3d..63c2ef8d 100644
--- a/csharp/AdapterTest/SocketWrapperTest.cs
+++ b/csharp/AdapterTest/SocketWrapperTest.cs
@@ -86,9 +86,9 @@ private void SocketTest(ISocketWrapper serverSocket)
             Assert.Throws<InvalidOperationException>(() => clientSock.GetStream());
             Assert.Throws<InvalidOperationException>(() => clientSock.Receive());
             Assert.Throws<InvalidOperationException>(() => clientSock.Send(null));
-            Assert.Throws<SocketException>(() => clientSock.Connect(IPAddress.Any, 1024));
+            Assert.Throws<SocketException>(() => clientSock.Connect(IPAddress.Any, 1024, null));
 
-            clientSock.Connect(IPAddress.Loopback, port);
+            clientSock.Connect(IPAddress.Loopback, port, null);
 
             // Valid invalid operation
             var byteBuf = ByteBufPool.Default.Allocate();
diff --git a/csharp/AdapterTest/TestWithMoqDemo.cs b/csharp/AdapterTest/TestWithMoqDemo.cs
index 337794b5..706413c3 100644
--- a/csharp/AdapterTest/TestWithMoqDemo.cs
+++ b/csharp/AdapterTest/TestWithMoqDemo.cs
@@ -80,7 +80,7 @@ public void TestInitialize()
                         ns.Flush();
                     }
                 });
-                return (listener.LocalEndPoint as IPEndPoint).Port;
+                return new SocketInfo((listener.LocalEndPoint as IPEndPoint).Port, null);
             });
             _mockRddProxy.Setup(m => m.RDDCollector).Returns(new RDDCollector());
 
diff --git a/csharp/AdapterTest/packages.config b/csharp/AdapterTest/packages.config
index c3a926b6..c7cc11eb 100644
--- a/csharp/AdapterTest/packages.config
+++ b/csharp/AdapterTest/packages.config
@@ -1,10 +1,11 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
+  <package id="log4net" version="2.0.8" targetFramework="net45" />
   <package id="Moq" version="4.2.1510.2205" targetFramework="net45" />
-  <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
+  <package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
   <package id="NUnit" version="3.0.1" targetFramework="net45" />
   <package id="NUnit.Console" version="3.0.1" developmentDependency="true" />
+  <package id="OpenCover" version="4.6.166" targetFramework="net45" developmentDependency="true" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="OpenCover" version="4.6.166" targetFramework="net45" developmentDependency="true" />
 </packages>
\ No newline at end of file
diff --git a/csharp/Repl/Repl.csproj b/csharp/Repl/Repl.csproj
index 35d8bd68..faf98f4f 100644
--- a/csharp/Repl/Repl.csproj
+++ b/csharp/Repl/Repl.csproj
@@ -34,6 +34,9 @@
     <Prefer32Bit>false</Prefer32Bit>
   </PropertyGroup>
   <ItemGroup>
+    <Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <HintPath>..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
+    </Reference>
     <Reference Include="Microsoft.CodeAnalysis, Version=1.2.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.dll</HintPath>
@@ -50,11 +53,13 @@
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\packages\Microsoft.Net.Compilers.1.1.1\tools\Microsoft.CodeAnalysis.Scripting.dll</HintPath>
     </Reference>
+    <Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
+      <HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
+    </Reference>
     <Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
-      <SpecificVersion>False</SpecificVersion>
       <HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
     </Reference>
-    <Reference Include="Razorvine.Serpent">
+    <Reference Include="Razorvine.Serpent, Version=1.12.0.35091, Culture=neutral, processorArchitecture=MSIL">
       <HintPath>..\packages\Razorvine.Serpent.1.12.0.0\lib\net40\Razorvine.Serpent.dll</HintPath>
     </Reference>
     <Reference Include="System" />
diff --git a/csharp/Repl/packages.config b/csharp/Repl/packages.config
index 76ea838a..7c1ac611 100644
--- a/csharp/Repl/packages.config
+++ b/csharp/Repl/packages.config
@@ -1,8 +1,8 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
-  <package id="log4net" version="2.0.5" targetFramework="net45" />
+  <package id="log4net" version="2.0.8" targetFramework="net461" />
   <package id="Microsoft.Net.Compilers" version="1.1.1" targetFramework="net45" />
-  <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
-  <package id="Razorvine.Pyrolite" version="4.10.0" targetFramework="net45" />
-  <package id="Razorvine.Serpent" version="1.12.0" targetFramework="net45" />
+  <package id="Newtonsoft.Json" version="11.0.2" targetFramework="net461" />
+  <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net461" />
+  <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net461" />
 </packages>
\ No newline at end of file
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs
index 5f4e5b49..cb6bac8c 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs
@@ -1867,5 +1867,72 @@ internal static void DFSaveSample()
             SparkCLRSamples.FileSystemHelper.DeleteDirectory(path, true);
             Console.WriteLine("Remove directory: {0}", path);
         }
+
+        /// <summary>
+        /// Single UDF Sample
+        /// </summary>
+        [Sample]
+        internal static void SingleUDFSample()
+        {
+            var sqlContext = GetSqlContext();
+            var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
+            peopleDataFrame.RegisterTempTable("peopleDataFrame");
+
+            sqlContext.RegisterFunction("UDF", (int x, int y) => { return x + y; });
+
+            var rowSet = sqlContext.Sql("SELECT * FROM peopleDataFrame where UDF(age, 20) > 60");
+
+            rowSet.Show();
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                Assert.AreEqual(rowSet.Count() ,2);
+            }
+        }
+
+        /// <summary>
+        /// Single UDF Sample with duplicate values
+        /// </summary>
+        [Sample]
+        internal static void SingleUDFWithDupSample()
+        {
+            var sqlContext = GetSqlContext();
+            var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
+            peopleDataFrame.RegisterTempTable("peopleDataFrame");
+
+            sqlContext.RegisterFunction("UDF", (int x, int y) => { return x + y; });
+
+            var rowSet = sqlContext.Sql("SELECT * FROM peopleDataFrame where UDF(age, age) < 50");
+
+            rowSet.Show();
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                Assert.AreEqual(rowSet.Count(), 1);
+            }
+        }
+
+        /// <summary>
+        /// Multiple UDFs sample
+        /// </summary>
+        [Sample]
+        internal static void MultipleUDFSample()
+        {
+            var sqlContext = GetSqlContext();
+            var peopleDataFrame = sqlContext.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(PeopleJson));
+            peopleDataFrame.RegisterTempTable("peopleDataFrame");
+
+            sqlContext.RegisterFunction("UDF1", (int x, int y) => { return x + y; });
+            sqlContext.RegisterFunction("UDF2", (string name, string id) => { return name + ":"  + id; });
+
+            var rowSet = sqlContext.Sql("SELECT id, name, UDF1(age, 20) AS UDF1, UDF2(name, id) AS UDF2 FROM peopleDataFrame where UDF1(age, 20) > 60");
+
+            rowSet.Show();
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                Assert.AreEqual(rowSet.Count(), 2);
+            }
+        }
     }
 }
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/Program.cs b/csharp/Samples/Microsoft.Spark.CSharp/Program.cs
index 1f25fa26..f9b5af55 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/Program.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/Program.cs
@@ -66,8 +66,10 @@ static void Main(string[] args)
 
             if (Configuration.IsValidationEnabled && !status)
             {
-                Environment.Exit(1);
+                Environment.Exit(2);
             }
+
+            Environment.Exit(1);
         }
 
         // Creates and returns a context
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj b/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj
index 880feb27..d28e1d69 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj
+++ b/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj
@@ -33,9 +33,11 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="Newtonsoft.Json, Version=7.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
-      <HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
-      <Private>True</Private>
+    <Reference Include="log4net, Version=2.0.8.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <HintPath>..\..\packages\log4net.2.0.8\lib\net45-full\log4net.dll</HintPath>
+    </Reference>
+    <Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
+      <HintPath>..\..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
     <Reference Include="nunit.framework, Version=3.0.5813.39031, Culture=neutral, PublicKeyToken=2638cd05610744eb, processorArchitecture=MSIL">
       <HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/packages.config b/csharp/Samples/Microsoft.Spark.CSharp/packages.config
index 4abe7e92..fc5be339 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/packages.config
+++ b/csharp/Samples/Microsoft.Spark.CSharp/packages.config
@@ -1,5 +1,6 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
-  <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
+  <package id="log4net" version="2.0.8" targetFramework="net45" />
+  <package id="Newtonsoft.Json" version="11.0.2" targetFramework="net45" />
   <package id="NUnit" version="3.0.1" targetFramework="net45" />
 </packages>
\ No newline at end of file
diff --git a/csharp/Tests.Common/Tests.Common.csproj b/csharp/Tests.Common/Tests.Common.csproj
index 361031e7..a2ca2c97 100644
--- a/csharp/Tests.Common/Tests.Common.csproj
+++ b/csharp/Tests.Common/Tests.Common.csproj
@@ -36,11 +36,10 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
-      <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
-    </Reference>
     <Reference Include="Microsoft.CSharp" />
+    <Reference Include="Newtonsoft.Json">
+      <HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
+    </Reference>
     <Reference Include="Razorvine.Pyrolite">
       <HintPath>..\packages\Razorvine.Pyrolite.4.10.0.0\lib\net40\Razorvine.Pyrolite.dll</HintPath>
     </Reference>
diff --git a/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileStatus.cs b/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileStatus.cs
new file mode 100644
index 00000000..02228494
--- /dev/null
+++ b/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileStatus.cs
@@ -0,0 +1,57 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Proxy.Ipc;
+
+namespace Microsoft.Spark.CSharp.Utils.FileSystem
+{
+	/// <summary>
+	/// See https://hadoop.apache.org/docs/r2.6.1/api/org/apache/hadoop/fs/FileStatus.html
+	/// </summary>
+	public class HdfsFileStatus
+	{
+		public long Length => _status.Value.Length;
+		public long ModificationTime => _status.Value.Time;
+		public string Owner => _status.Value.Owner;
+		public string Path => _status.Value.Path;
+		public bool IsFile => _status.Value.IsFile;
+		public bool IsDirectory => _status.Value.IsDirectory;
+		public bool IsSymlink => _status.Value.IsSymlink;
+
+		private Lazy<Status> _status;
+
+		internal HdfsFileStatus(JvmObjectReference obj)
+		{
+			_status = new Lazy<Status>(()=>new Status(obj));
+		}
+
+		private class Status
+		{
+			public long Length;
+			public long Time;
+			public string Owner;
+			public string Path;
+			public bool IsFile;
+			public bool IsDirectory;
+			public bool IsSymlink;
+
+			public Status(JvmObjectReference obj)
+			{
+				Length = (long) SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getLen");
+				Time = (long)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getModificationTime");
+				Owner = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getOwner");
+				IsFile = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isFile");
+				IsDirectory = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isDirectory");
+				IsSymlink = (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "isSymlink");
+				var pr = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(obj, "getPath"));
+				Path = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pr, "getName");
+			}
+		}
+	}
+}
diff --git a/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileSystemHelper.cs b/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileSystemHelper.cs
index 52d20c3b..c88c93b1 100644
--- a/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileSystemHelper.cs
+++ b/csharp/Utils/Microsoft.Spark.CSharp/FileSystem/HdfsFileSystemHelper.cs
@@ -4,8 +4,11 @@
 using System;
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using Microsoft.Spark.CSharp.Interop;
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Proxy.Ipc;
+using Microsoft.Spark.CSharp.Utils.FileSystem;
 
 namespace Microsoft.Spark.CSharp.Utils
 {
@@ -18,7 +21,7 @@ public class HdfsFileSystemHelper : IFileSystemHelper
 
         public HdfsFileSystemHelper()
         {
-            var jvmConfReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.conf.Configuration");
+	        var jvmConfReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.conf.Configuration");
             jvmHdfsReference = new JvmObjectReference((string) SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.hadoop.fs.FileSystem", "get", jvmConfReference));
         }
 
@@ -39,16 +42,25 @@ public IEnumerable<string> EnumerateFiles(string path)
             for (var i = 0; i < statusList.Count; i++)
             {
                 var subPathJvmReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(statusList[i], "getPath"));
-                files[i] = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(subPathJvmReference, "getName");
+                files[i] = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(subPathJvmReference, "getName");	
             }
 
             return files;
         }
 
-        /// <summary>
-        /// Build a temp file path under '/tmp' path on HDFS.
-        /// </summary>
-        public string GetTempFileName()
+		/// <summary>
+		/// List the names of all the files under the given path.
+		/// </summary>
+		public IEnumerable<HdfsFileStatus> ListStatus(string path)
+		{
+			var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
+			return ((List<JvmObjectReference>)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "listStatus", pathJvmReference)).Select(r=>new HdfsFileStatus(r));
+		}
+
+		/// <summary>
+		/// Build a temp file path under '/tmp' path on HDFS.
+		/// </summary>
+		public string GetTempFileName()
         {
             return "/tmp/" + Guid.NewGuid().ToString("N");
         }
@@ -91,5 +103,37 @@ internal bool Delete(string path, bool recursive)
             var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
             return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "delete", pathJvmReference, recursive);
         }
-    }
+
+	    public bool IsFile(string path)
+	    {
+			var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
+			return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "isFile", pathJvmReference);
+		}
+
+		public bool IsDirectory(string path)
+		{
+			var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
+			return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "isDirectory", pathJvmReference);
+		}
+
+		public bool Touch(string path)
+		{
+			var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
+			return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "createNewFile", pathJvmReference);
+		}
+
+		public void CopyFromLocalFile(string src, string dest)
+		{
+			var from = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", new Uri(src).AbsoluteUri);
+			var to = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", dest);
+			SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "copyFromLocalFile", from, to);
+		}
+
+		public void CopyToLocalFile(string src, string dest)
+		{
+			var to = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", new Uri(dest).AbsoluteUri);
+			var from = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", src);
+			SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "copyToLocalFile", from, to);
+		}
+	}
 }
diff --git a/csharp/Utils/Microsoft.Spark.CSharp/Utils.csproj b/csharp/Utils/Microsoft.Spark.CSharp/Utils.csproj
index 60657c71..d089d7db 100644
--- a/csharp/Utils/Microsoft.Spark.CSharp/Utils.csproj
+++ b/csharp/Utils/Microsoft.Spark.CSharp/Utils.csproj
@@ -40,6 +40,7 @@
     <Reference Include="Microsoft.CSharp" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="FileSystem\HdfsFileStatus.cs" />
     <Compile Include="FileSystem\LocalFileSystemHelper.cs" />
     <Compile Include="FileSystem\HdfsFileSystemHelper.cs" />
     <Compile Include="FileSystem\FileSystemHelper.cs" />
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/MultiThreadWorker.cs b/csharp/Worker/Microsoft.Spark.CSharp/MultiThreadWorker.cs
index f73e90b8..6fb9e1a0 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/MultiThreadWorker.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/MultiThreadWorker.cs
@@ -111,7 +111,8 @@ private void StartDaemonServer(ISocketWrapper listener)
 
             bool sparkReuseWorker = false;
             string envVar = Environment.GetEnvironmentVariable("SPARK_REUSE_WORKER"); // this envVar is set in JVM side
-            if ((envVar != null) && envVar.Equals("1"))
+			var secret = Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_SECRET");
+			if ((envVar != null) && envVar.Equals("1"))
             {
                 sparkReuseWorker = true;
             }
@@ -130,7 +131,7 @@ private void StartDaemonServer(ISocketWrapper listener)
                         SerDe.Write(s, trId); // write taskRunnerId to JVM side
                         s.Flush();
                     }
-                    TaskRunner taskRunner = new TaskRunner(trId, socket, sparkReuseWorker);
+                    TaskRunner taskRunner = new TaskRunner(trId, socket, sparkReuseWorker, secret);
                     waitingTaskRunners.Add(taskRunner);
                     taskRunnerRegistry[trId] = taskRunner;
                     trId++;
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/TaskRunner.cs b/csharp/Worker/Microsoft.Spark.CSharp/TaskRunner.cs
index fb88e431..fb398567 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/TaskRunner.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/TaskRunner.cs
@@ -3,7 +3,9 @@
 
 using System;
 using System.IO;
+using System.Net;
 using System.Runtime.CompilerServices;
+using System.Text;
 using System.Threading;
 using Microsoft.Spark.CSharp.Configuration;
 using Microsoft.Spark.CSharp.Interop.Ipc;
@@ -13,106 +15,116 @@
 [assembly: InternalsVisibleTo("WorkerTest")]
 namespace Microsoft.Spark.CSharp
 {
-    /// <summary>
-    /// TaskRunner is used to run Spark task assigned by JVM side. It uses a TCP socket to
-    /// communicate with JVM side. This socket may be reused to run multiple Spark tasks.
-    /// </summary>
-    internal class TaskRunner
-    {
-        private static ILoggerService logger;
-        private static ILoggerService Logger
-        {
-            get
-            {
-                if (logger != null) return logger;
-                logger = LoggerServiceFactory.GetLogger(typeof(TaskRunner));
-                return logger;
-            }
-        }
+	/// <summary>
+	/// TaskRunner is used to run Spark task assigned by JVM side. It uses a TCP socket to
+	/// communicate with JVM side. This socket may be reused to run multiple Spark tasks.
+	/// </summary>
+	internal class TaskRunner
+	{
+		private static ILoggerService logger;
+		private static ILoggerService Logger
+		{
+			get
+			{
+				if (logger != null) return logger;
+				logger = LoggerServiceFactory.GetLogger(typeof(TaskRunner));
+				return logger;
+			}
+		}
 
-        private readonly ISocketWrapper socket;  // Socket to communicate with JVM
-        private volatile bool stop;
-        private readonly bool socketReuse; // whether the socket can be reused to run multiple Spark tasks
+		private readonly ISocketWrapper socket;  // Socket to communicate with JVM
+		private volatile bool stop;
+		private readonly bool socketReuse; // whether the socket can be reused to run multiple Spark tasks
+		private string secret;
 
-        /// <summary>
-        /// Task runner Id
-        /// </summary>
-        public int TaskId { get; private set; }
+		/// <summary>
+		/// Task runner Id
+		/// </summary>
+		public int TaskId { get; private set; }
 
-        public TaskRunner(int trId, ISocketWrapper socket, bool socketReuse)
-        {
-            TaskId = trId;
-            this.socket = socket;
-            this.socketReuse = socketReuse;
-        }
+		public TaskRunner(int trId, ISocketWrapper socket, bool socketReuse, string secret)
+		{
+			TaskId = trId;
+			this.socket = socket;
+			this.socketReuse = socketReuse;
+			this.secret = secret;
+		}
 
-        public void Run()
-        {
-            Logger.LogInfo("TaskRunner [{0}] is running ...", TaskId);
+		public void Run()
+		{
+			Logger.LogInfo("TaskRunner [{0}] is running ...", TaskId);
 
-            try
-            {
-                while (!stop)
-                {
-                    using (var inputStream = socket.GetInputStream())
-                    using (var outputStream = socket.GetOutputStream())
-                    {
-                        byte[] bytes = SerDe.ReadBytes(inputStream, sizeof(int));
-                        if (bytes != null)
-                        {
-                            int splitIndex = SerDe.ToInt(bytes);
-                            bool readComplete = Worker.ProcessStream(inputStream, outputStream, splitIndex);
-                            outputStream.Flush();
-                            if (!readComplete) // if the socket is not read through completely, then it can't be reused
-                            {
-                                stop = true;
-                                // wait for server to complete, otherwise server may get 'connection reset' exception
-                                Logger.LogInfo("Sleep 500 millisecond to close socket ...");
-                                Thread.Sleep(500);
-                            }
-                            else if (!socketReuse)
-                            {
-                                stop = true;
-                                // wait for server to complete, otherwise server gets 'connection reset' exception
-                                // Use SerDe.ReadBytes() to detect java side has closed socket properly
-                                // ReadBytes() will block until the socket is closed
-                                Logger.LogInfo("waiting JVM side to close socket...");
-                                SerDe.ReadBytes(inputStream);
-                                Logger.LogInfo("JVM side has closed socket");
-                            }
-                        }
-                        else
-                        {
-                            stop = true;
-                            Logger.LogWarn("read null splitIndex, socket is closed by JVM");
-                        }
-                    }
-                }
-            }
-            catch (Exception e)
-            {
-                stop = true;
-                Logger.LogError("TaskRunner [{0}] exeption, will dispose this TaskRunner", TaskId);
-                Logger.LogException(e);
-            }
-            finally
-            {
-                try
-                {
-                    socket.Close();
-                }
-                catch (Exception ex)
-                {
-                    Logger.LogWarn("close socket exception: {0}", ex);
-                }
-                Logger.LogInfo("TaskRunner [{0}] finished", TaskId);
-            }
-        }
+			try
+			{
+				while (!stop)
+				{
+					using (var inputStream = socket.GetInputStream())
+					using (var outputStream = socket.GetOutputStream())
+					{
+						if (!string.IsNullOrEmpty(secret))
+						{
+							SerDe.Write(outputStream, secret);
+							outputStream.Flush();
+							var reply = SerDe.ReadString(inputStream);
+							Logger.LogDebug("Connect back to JVM: " + reply);
+							secret = null;
+						}
+						byte[] bytes = SerDe.ReadBytes(inputStream, sizeof(int));
+						if (bytes != null)
+						{
+							int splitIndex = SerDe.ToInt(bytes);
+							bool readComplete = Worker.ProcessStream(inputStream, outputStream, splitIndex);
+							outputStream.Flush();
+							if (!readComplete) // if the socket is not read through completely, then it can't be reused
+							{
+								stop = true;
+								// wait for server to complete, otherwise server may get 'connection reset' exception
+								Logger.LogInfo("Sleep 500 millisecond to close socket ...");
+								Thread.Sleep(500);
+							}
+							else if (!socketReuse)
+							{
+								stop = true;
+								// wait for server to complete, otherwise server gets 'connection reset' exception
+								// Use SerDe.ReadBytes() to detect java side has closed socket properly
+								// ReadBytes() will block until the socket is closed
+								Logger.LogInfo("waiting JVM side to close socket...");
+								SerDe.ReadBytes(inputStream);
+								Logger.LogInfo("JVM side has closed socket");
+							}
+						}
+						else
+						{
+							stop = true;
+							Logger.LogWarn("read null splitIndex, socket is closed by JVM");
+						}
+					}
+				}
+			}
+			catch (Exception e)
+			{
+				stop = true;
+				Logger.LogError("TaskRunner [{0}] exeption, will dispose this TaskRunner", TaskId);
+				Logger.LogException(e);
+			}
+			finally
+			{
+				try
+				{
+					socket.Close();
+				}
+				catch (Exception ex)
+				{
+					Logger.LogWarn("close socket exception: {0}", ex);
+				}
+				Logger.LogInfo("TaskRunner [{0}] finished", TaskId);
+			}
+		}
 
-        public void Stop()
-        {
-            Logger.LogInfo("try to stop TaskRunner [{0}]", TaskId);
-            stop = true;
-        }
-    }
+		public void Stop()
+		{
+			Logger.LogInfo("try to stop TaskRunner [{0}]", TaskId);
+			stop = true;
+		}
+	}
 }
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/UDFCommand.cs b/csharp/Worker/Microsoft.Spark.CSharp/UDFCommand.cs
new file mode 100644
index 00000000..43cf6b5c
--- /dev/null
+++ b/csharp/Worker/Microsoft.Spark.CSharp/UDFCommand.cs
@@ -0,0 +1,391 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Services;
+using Microsoft.Spark.CSharp.Sql;
+using Razorvine.Pickle;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.Serialization;
+using System.Runtime.Serialization.Formatters.Binary;
+
+namespace Microsoft.Spark.CSharp
+{
+    /// <summary>
+    /// This class execute user defined methods.    
+    /// </summary>
+
+    internal class UDFCommand
+    {
+        private readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
+        private ILoggerService logger;
+        private Stream inputStream;
+        private Stream outputStream;
+        private int splitIndex;
+        private DateTime bootTime;
+        private string deserializerMode;
+        private string serializerMode;
+        private IFormatter formatter;
+        private Stopwatch commandProcessWatch;
+        private int isSqlUdf;
+        private List<WorkerFunc> workerFuncList;
+        private int stageId;
+
+        public UDFCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime, 
+            string deserializerMode, string serializerMode, IFormatter formatter, 
+            Stopwatch commandProcessWatch, int isSqlUdf, List<WorkerFunc> workerFuncList, int stageId)
+        {
+            this.inputStream = inputStream;
+            this.outputStream = outputStream;
+            this.splitIndex = splitIndex;
+            this.bootTime = bootTime;
+            this.deserializerMode = deserializerMode;
+            this.serializerMode = serializerMode;
+            this.formatter = formatter;
+            this.commandProcessWatch = commandProcessWatch;
+            this.isSqlUdf = isSqlUdf;
+            this.workerFuncList = workerFuncList;
+            this.stageId = stageId;
+
+            InitializeLogger();
+        }
+
+        private void InitializeLogger()
+        {
+            try
+            {
+                // if there exists exe.config file, then use log4net
+                if (File.Exists(AppDomain.CurrentDomain.SetupInformation.ConfigurationFile))
+                {
+                    LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance);
+                }
+
+                logger = LoggerServiceFactory.GetLogger(typeof(UDFCommand));
+            }
+            catch (Exception e)
+            {
+                Console.WriteLine("InitializeLogger exception {0}, will exit", e);
+                Environment.Exit(-1);
+            }
+        }
+
+        internal void Execute()
+        {
+            if (isSqlUdf == 0)
+            {
+                ExecuteNonSqlUDF();
+            }
+            else
+            {
+                ExecuteSqlUDF();
+            }
+        }
+
+        private void ExecuteNonSqlUDF()
+        {
+            int count = 0;
+            int nullMessageCount = 0;
+            logger.LogDebug("Beginning to execute non sql func");
+            WorkerFunc workerFunc = workerFuncList[0];
+            var func = workerFunc.CharpWorkerFunc.Func;
+
+            var funcProcessWatch = Stopwatch.StartNew();
+            DateTime initTime = DateTime.UtcNow;
+            foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
+            {
+                funcProcessWatch.Stop();
+
+                if (object.ReferenceEquals(null, message))
+                {
+                    nullMessageCount++;
+                    continue;
+                }
+
+                try
+                {
+                    WriteOutput(outputStream, serializerMode, message, formatter);
+                }
+                catch (Exception ex)
+                {
+                    logger.LogError("WriteOutput() failed at iteration {0}, execption {1}", count, ex);
+                    throw;
+                }
+
+                count++;
+                funcProcessWatch.Start();
+            }
+
+            logger.LogInfo("Output entries count: " + count);
+            logger.LogDebug("Null messages count: " + nullMessageCount);
+
+            WriteDiagnosticsInfo(outputStream, bootTime, initTime);
+
+            commandProcessWatch.Stop();
+
+            // log statistics
+            logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
+            logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
+        }
+
+        private void ExecuteSqlUDF()
+        {
+            int count = 0;
+            int nullMessageCount = 0;
+            logger.LogDebug("Beginning to execute sql func");
+
+            var funcProcessWatch = Stopwatch.StartNew();
+            DateTime initTime = DateTime.UtcNow;
+
+            foreach (var row in GetIterator(inputStream, deserializerMode, isSqlUdf))
+            {                               
+                List<Object> messages = new List<Object>();
+               
+                foreach (WorkerFunc workerFunc in workerFuncList)
+                {
+                    List<Object> args = new List<Object>();
+                    foreach (int offset in workerFunc.ArgOffsets)
+                    {                        
+                        args.Add(row[offset]);
+                    }
+
+                    foreach (var message in workerFunc.CharpWorkerFunc.Func(splitIndex, new[] { args.ToArray()}))
+                    {
+                        funcProcessWatch.Stop();
+
+                        if (object.ReferenceEquals(null, message))
+                        {
+                            nullMessageCount++;
+                            continue;
+                        }
+
+                        messages.Add(message);
+                    }
+                }
+
+                try
+                {
+                    dynamic res = messages.ToArray();
+                    if (messages.Count == 1)
+                    {
+                        res = messages[0];
+                    }
+
+                    WriteOutput(outputStream, serializerMode, res, formatter);
+                }
+                catch (Exception ex)
+                {
+                    logger.LogError("WriteOutput() failed at iteration {0}, exception error {1}", count, ex.Message);
+                    throw;
+                }
+
+                count++;
+                funcProcessWatch.Start();
+            }
+
+            logger.LogInfo("Output entries count: " + count);
+            logger.LogDebug("Null messages count: " + nullMessageCount);
+
+            WriteDiagnosticsInfo(outputStream, bootTime, initTime);
+
+            commandProcessWatch.Stop();
+
+            // log statistics
+            logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
+            logger.LogInfo("stage {0}, command process time: {0}", stageId, commandProcessWatch.ElapsedMilliseconds);
+        }
+
+        private void WriteOutput(Stream networkStream, string serializerMode, dynamic message, IFormatter formatter)
+        {
+            var buffer = GetSerializedMessage(serializerMode, message, formatter);
+            if (buffer == null)
+            {
+                logger.LogError("Buffer is null");
+            }
+
+            if (buffer.Length <= 0)
+            {
+                logger.LogError("Buffer length {0} cannot be <= 0", buffer.Length);
+            }
+
+            SerDe.Write(networkStream, buffer.Length);
+            SerDe.Write(networkStream, buffer);
+        }
+
+        private byte[] GetSerializedMessage(string serializerMode, dynamic message, IFormatter formatter)
+        {
+            byte[] buffer;
+
+            switch ((SerializedMode)Enum.Parse(typeof(SerializedMode), serializerMode))
+            {
+                case SerializedMode.None:
+                    buffer = message as byte[];
+                    break;
+
+                case SerializedMode.String:
+                    buffer = SerDe.ToBytes(message as string);
+                    break;
+
+                case SerializedMode.Row:
+                    var pickler = new Pickler();
+                    buffer = pickler.dumps(new ArrayList { message });
+                    break;
+
+                default:
+                    try
+                    {
+                        var ms = new MemoryStream();
+                        formatter.Serialize(ms, message);
+                        buffer = ms.ToArray();
+                    }
+                    catch (Exception ex)
+                    {
+                        logger.LogError("Exception serializing output: " + ex);
+                        logger.LogError("{0} : {1}", message.GetType().Name, message.GetType().FullName);
+                        throw;
+                    }
+                    break;
+            }
+
+            return buffer;
+        }
+
+        private void WriteDiagnosticsInfo(Stream networkStream, DateTime bootTime, DateTime initTime)
+        {
+            DateTime finishTime = DateTime.UtcNow;
+            const string format = "MM/dd/yyyy hh:mm:ss.fff tt";
+
+            logger.LogDebug("bootTime: {0}, initTime: {1}, finish_time: {2}",
+                bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format));
+
+            SerDe.Write(networkStream, (int)SpecialLengths.TIMING_DATA);
+            SerDe.Write(networkStream, ToUnixTime(bootTime));
+            SerDe.Write(networkStream, ToUnixTime(initTime));
+            SerDe.Write(networkStream, ToUnixTime(finishTime));
+
+            SerDe.Write(networkStream, 0L); //shuffle.MemoryBytesSpilled  
+            SerDe.Write(networkStream, 0L); //shuffle.DiskBytesSpilled
+        }
+
+        private long ToUnixTime(DateTime dt)
+        {
+            return (long)(dt - UnixTimeEpoch).TotalMilliseconds;
+        }
+
+        private IEnumerable<dynamic> GetIterator(Stream inputStream, string serializedMode, int isFuncSqlUdf)
+        {
+            logger.LogInfo("Serialized mode in GetIterator: " + serializedMode);           
+            IFormatter formatter = new BinaryFormatter();
+            var mode = (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode);
+            int messageLength;
+            Stopwatch watch = Stopwatch.StartNew();
+            Row tempRow = null;
+
+            while ((messageLength = SerDe.ReadInt(inputStream)) != (int)SpecialLengths.END_OF_DATA_SECTION)
+            {
+                watch.Stop();
+                if (messageLength > 0 || messageLength == (int)SpecialLengths.NULL)
+                {
+                    watch.Start();
+                    byte[] buffer = messageLength > 0 ? SerDe.ReadBytes(inputStream, messageLength) : null;
+                    watch.Stop();
+                    switch (mode)
+                    {
+                        case SerializedMode.String:
+                            {
+                                if (messageLength > 0)
+                                {
+                                    if (buffer == null)
+                                    {
+                                        logger.LogDebug("Buffer is null. Message length is {0}", messageLength);
+                                    }
+                                    yield return SerDe.ToString(buffer);
+                                }
+                                else
+                                {
+                                    yield return null;
+                                }
+                                break;
+                            }
+
+                        case SerializedMode.Row:
+                            {
+                                Debug.Assert(messageLength > 0);
+                                var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
+
+                                if (isFuncSqlUdf == 0)
+                                {
+                                    foreach (var row in unpickledObjects.Select(item => (item as RowConstructor).GetRow()))
+                                    {
+                                        yield return row;
+                                    }
+                                }
+                                else
+                                {
+                                    foreach (var row in unpickledObjects)
+                                    {
+                                        yield return row;
+                                    }
+                                }
+
+                                break;
+                            }
+
+                        case SerializedMode.Pair:
+                            {
+                                byte[] pairKey = buffer;
+                                byte[] pairValue;
+
+                                watch.Start();
+                                int valueLength = SerDe.ReadInt(inputStream);
+                                if (valueLength > 0)
+                                {
+                                    pairValue = SerDe.ReadBytes(inputStream, valueLength);
+                                }
+                                else if (valueLength == (int)SpecialLengths.NULL)
+                                {
+                                    pairValue = null;
+                                }
+                                else
+                                {
+                                    throw new Exception(string.Format("unexpected valueLength: {0}", valueLength));
+                                }
+                                watch.Stop();
+
+                                yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
+                                break;
+                            }
+
+                        case SerializedMode.None: //just return raw bytes
+                            {
+                                yield return buffer;
+                                break;
+                            }
+
+                        default:
+                            {
+                                if (buffer != null)
+                                {
+                                    var ms = new MemoryStream(buffer);
+                                    yield return formatter.Deserialize(ms);
+                                }
+                                else
+                                {
+                                    yield return null;
+                                }
+                                break;
+                            }
+                    }
+                }
+                watch.Start();
+            }
+
+            logger.LogInfo("total receive time: {0}", watch.ElapsedMilliseconds);
+        }
+    }
+}
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs b/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
index 486a1bc7..c034ca6c 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
@@ -2,7 +2,6 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System;
-using System.Collections;
 using System.Collections.Concurrent;
 using System.Collections.Generic;
 using System.IO;
@@ -17,8 +16,6 @@
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Network;
 using Microsoft.Spark.CSharp.Services;
-using Microsoft.Spark.CSharp.Sql;
-using Razorvine.Pickle;
 
 namespace Microsoft.Spark.CSharp
 {
@@ -31,7 +28,6 @@ namespace Microsoft.Spark.CSharp
     /// </summary>
     public class Worker
     {
-        private static readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
         private static ILoggerService logger;
         private static SparkCLRAssemblyHandler assemblyHandler;
 
@@ -81,11 +77,13 @@ private static void RunSimpleWorker()
                 InitializeLogger();
                 logger.LogInfo("RunSimpleWorker ...");
                 PrintFiles();
-
-                int javaPort = int.Parse(Console.ReadLine()); //reading port number written from JVM
-                logger.LogDebug("Port number used to pipe in/out data between JVM and CLR {0}", javaPort);
+                //int javaPort = int.Parse(Console.ReadLine()); //reading port number written from JVM
+	            var javaPort = int.Parse(Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT"));
+	            var secret = Environment.GetEnvironmentVariable("PYTHON_WORKER_FACTORY_SECRET");
+				logger.LogDebug("Port and secret number used to pipe in/out data between JVM and CLR {0} {1}", javaPort, secret);
                 var socket = InitializeSocket(javaPort);
-                TaskRunner taskRunner = new TaskRunner(0, socket, false);
+	            //Microsoft.Spark.CSharp.Network.Utils.DoServerAuth(socket, secret);
+				TaskRunner taskRunner = new TaskRunner(0, socket, false, secret);
                 taskRunner.Run();
             }
             catch (Exception e)
@@ -119,7 +117,7 @@ public static void InitializeLogger()
         private static ISocketWrapper InitializeSocket(int javaPort)
         {
             var socket = SocketFactory.CreateSocket();
-            socket.Connect(IPAddress.Loopback, javaPort);
+            socket.Connect(IPAddress.Loopback, javaPort, null);
             return socket;
         }
 
@@ -138,9 +136,13 @@ public static bool ProcessStream(Stream inputStream, Stream outputStream, int sp
                 //// initialize global state
                 //shuffle.MemoryBytesSpilled = 0
                 //shuffle.DiskBytesSpilled = 0
+	            SerDe.ReadInt(inputStream);
+				SerDe.ReadInt(inputStream);
+				SerDe.ReadInt(inputStream);
+				SerDe.ReadLong(inputStream);
 
-                // fetch name of workdir
-                string sparkFilesDir = SerDe.ReadString(inputStream);
+				// fetch name of workdir
+				string sparkFilesDir = SerDe.ReadString(inputStream);
                 logger.LogDebug("spark_files_dir: " + sparkFilesDir);
                 //SparkFiles._root_directory = sparkFilesDir
                 //SparkFiles._is_running_on_worker = True
@@ -149,7 +151,7 @@ public static bool ProcessStream(Stream inputStream, Stream outputStream, int sp
 
                 ProcessBroadcastVariables(inputStream);
 
-                Accumulator.threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
+	            Accumulator.threadLocalAccumulatorRegistry = new Dictionary<int, Accumulator>();
 
                 var formatter = ProcessCommand(inputStream, outputStream, splitIndex, bootTime);
 
@@ -255,94 +257,117 @@ private static IFormatter ProcessCommand(Stream inputStream, Stream outputStream
             logger.LogDebug("Is func Sql UDF = {0}", isSqlUdf);
 
             IFormatter formatter = new BinaryFormatter();
+            UDFCommand command = null;
 
             if (isSqlUdf == 0)
             {
-                logger.LogDebug("Processing non-UDF command");
-                int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
-                logger.LogDebug("Command length: " + lengthOfCommandByteArray);
+                command = ProcessNonUdfCommand(inputStream, outputStream, splitIndex, bootTime, formatter, isSqlUdf);
+            }
+            else
+            {
+                command = ProcessUdfCommand(inputStream, outputStream, splitIndex, bootTime, formatter, isSqlUdf);
+            }
+
+            if (command != null)
+            {
+                command.Execute();
+            }
+
+            return formatter;
+        }
+
+        private static UDFCommand ProcessNonUdfCommand(Stream inputStream, Stream outputStream, int splitIndex, 
+            DateTime bootTime, IFormatter formatter, int isSqlUdf)
+        {
+            logger.LogDebug("Processing non-UDF command");
+            int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
+            logger.LogDebug("Command length: " + lengthOfCommandByteArray);
+
+            UDFCommand command = null;
+            if (lengthOfCommandByteArray > 0)
+            {
+                var commandProcessWatch = new Stopwatch();
+                commandProcessWatch.Start();
+
+                int stageId;
+                string deserializerMode;
+                string serializerMode;
+                CSharpWorkerFunc cSharpWorkerFunc;
+                ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
+                    out cSharpWorkerFunc);
+
+                command = new UDFCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode,
+                    serializerMode, formatter, commandProcessWatch, isSqlUdf,
+                    new List<WorkerFunc>() { new WorkerFunc(cSharpWorkerFunc, 0, null) }, stageId);
 
-                if (lengthOfCommandByteArray > 0)
-                {
-                    var commandProcessWatch = new Stopwatch();
-                    commandProcessWatch.Start();
-
-                    int stageId;
-                    string deserializerMode;
-                    string serializerMode;
-                    CSharpWorkerFunc workerFunc;
-                    ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
-                        out workerFunc);
-
-                    ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, workerFunc, serializerMode,
-                        formatter, commandProcessWatch, stageId, isSqlUdf);
-                }
-                else
-                {
-                    logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-(");
-                }
             }
             else
             {
-                logger.LogDebug("Processing UDF command");
-                var udfCount = SerDe.ReadInt(inputStream);
-                logger.LogDebug("Count of UDFs = {0}", udfCount);
+                logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-(");
+            }
 
-                if (udfCount == 1)
+            return command;
+        }
+
+        private static UDFCommand ProcessUdfCommand(Stream inputStream, Stream outputStream, int splitIndex,
+            DateTime bootTime, IFormatter formatter, int isSqlUdf)
+        {
+            logger.LogDebug("Processing UDF command");
+            var udfCount = SerDe.ReadInt(inputStream);
+            logger.LogDebug("Count of UDFs = {0}", udfCount);
+
+            int stageId = -1;
+            string deserializerMode = null;
+            string serializerMode = null;
+            var commandProcessWatch = new Stopwatch();
+            List<WorkerFunc> workerFuncList = new List<WorkerFunc>();
+
+            for(int udfIter = 0; udfIter < udfCount; udfIter++)
+            { 
+                CSharpWorkerFunc func = null;
+                var argCount = SerDe.ReadInt(inputStream);
+                logger.LogDebug("Count of args = {0}", argCount);
+
+                List<int> argOffsets = new List<int>();
+                for (int argIndex = 0; argIndex < argCount; argIndex++)
                 {
-                    CSharpWorkerFunc func = null;
-                    var argCount = SerDe.ReadInt(inputStream);
-                    logger.LogDebug("Count of args = {0}", argCount);
+                    var offset = SerDe.ReadInt(inputStream);
+                    logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
+                    argOffsets.Add(offset);
+                }
+
+                var chainedFuncCount = SerDe.ReadInt(inputStream);
+                logger.LogDebug("Count of chained func = {0}", chainedFuncCount);
 
-                    var argOffsets = new List<int>();
+                for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
+                {
+                    int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
+                    logger.LogDebug("UDF command length: " + lengthOfCommandByteArray);
 
-                    for (int argIndex = 0; argIndex < argCount; argIndex++)
+                    if (lengthOfCommandByteArray > 0)
                     {
-                        var offset = SerDe.ReadInt(inputStream);
-                        logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
-                        argOffsets.Add(offset);
+                        CSharpWorkerFunc workerFunc;
+                        ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
+                            out workerFunc);
+
+                        func = func == null ? workerFunc : CSharpWorkerFunc.Chain(func, workerFunc);
                     }
-                    var chainedFuncCount = SerDe.ReadInt(inputStream);
-                    logger.LogDebug("Count of chained func = {0}", chainedFuncCount);
-
-                    var commandProcessWatch = new Stopwatch();
-                    int stageId = -1;
-                    string deserializerMode = null;
-                    string serializerMode = null;
-                    for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
+                    else
                     {
-                        int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
-                        logger.LogDebug("UDF command length: " + lengthOfCommandByteArray)
-                        ;
-
-                        if (lengthOfCommandByteArray > 0)
-                        {
-                            CSharpWorkerFunc workerFunc;
-                            ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
-                                out workerFunc);
-
-                            func = func == null ? workerFunc : CSharpWorkerFunc.Chain(func, workerFunc);
-                        }
-                        else
-                        {
-                            logger.LogWarn("UDF lengthOfCommandByteArray = 0. Nothing to execute :-(");
-                        }
+                        logger.LogWarn("UDF lengthOfCommandByteArray = 0. Nothing to execute :-(");
                     }
-
-                    Debug.Assert(stageId != -1);
-                    Debug.Assert(deserializerMode != null);
-                    Debug.Assert(serializerMode != null);
-                    Debug.Assert(func != null);
-                    ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, func, serializerMode, formatter,
-                        commandProcessWatch, stageId, isSqlUdf);
-                }
-                else
-                {
-                    throw new NotSupportedException(); //TODO - add support for multiple UDFs
                 }
+
+                Debug.Assert(stageId != -1);
+                Debug.Assert(deserializerMode != null);
+                Debug.Assert(serializerMode != null);
+                Debug.Assert(func != null);
+
+                workerFuncList.Add(new WorkerFunc(func, argCount, argOffsets));
             }
 
-            return formatter;
+            return new UDFCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode,
+                    serializerMode, formatter, commandProcessWatch, isSqlUdf, workerFuncList, stageId);
         }
 
         private static void ReadCommand(Stream networkStream, IFormatter formatter, out int stageId,
@@ -388,116 +413,7 @@ private static IFormatter ProcessCommand(Stream inputStream, Stream outputStream
                 "--------------------------------------------------------------------------------------------------------------");
             logger.LogDebug(sb.ToString());
         }
-
-        private static void ExecuteCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime,
-                     string deserializerMode, CSharpWorkerFunc workerFunc, string serializerMode,
-                     IFormatter formatter, Stopwatch commandProcessWatch, int stageId, int isSqlUdf)
-        {
-            int count = 0;
-            int nullMessageCount = 0;
-            logger.LogDebug("Beginning to execute func");
-            var func = workerFunc.Func;
-
-            var funcProcessWatch = Stopwatch.StartNew();
-            DateTime initTime = DateTime.UtcNow;
-            foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
-            {
-                funcProcessWatch.Stop();
-
-                if (object.ReferenceEquals(null, message))
-                {
-                    nullMessageCount++;
-                    continue;
-                }
-
-                try
-                {
-                    WriteOutput(outputStream, serializerMode, message, formatter);
-                }
-                catch (Exception)
-                {
-                    logger.LogError("WriteOutput() failed at iteration {0}", count);
-                    throw;
-                }
-
-                count++;
-                funcProcessWatch.Start();
-            }
-
-            logger.LogInfo("Output entries count: " + count);
-            logger.LogDebug("Null messages count: " + nullMessageCount);
-
-            //if profiler:
-            //    profiler.profile(process)
-            //else:
-            //    process()
-
-            WriteDiagnosticsInfo(outputStream, bootTime, initTime);
-
-            commandProcessWatch.Stop();
-
-            // log statistics
-            logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
-            logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
-        }
-
-        private static void WriteOutput(Stream networkStream, string serializerMode, dynamic message, IFormatter formatter)
-        {
-            var buffer = GetSerializedMessage(serializerMode, message, formatter);
-            if (buffer == null)
-            {
-                logger.LogError("Buffer is null");
-            }
-
-            if (buffer.Length <= 0)
-            {
-                logger.LogError("Buffer length {0} cannot be <= 0", buffer.Length);
-            }
-
-            //Debug.Assert(buffer != null);
-            //Debug.Assert(buffer.Length > 0);
-            SerDe.Write(networkStream, buffer.Length);
-            SerDe.Write(networkStream, buffer);
-        }
-
-        private static byte[] GetSerializedMessage(string serializerMode, dynamic message, IFormatter formatter)
-        {
-            byte[] buffer;
-
-            switch ((SerializedMode)Enum.Parse(typeof(SerializedMode), serializerMode))
-            {
-                case SerializedMode.None:
-                    buffer = message as byte[];
-                    break;
-
-                case SerializedMode.String:
-                    buffer = SerDe.ToBytes(message as string);
-                    break;
-
-                case SerializedMode.Row:
-                    var pickler = new Pickler();
-                    buffer = pickler.dumps(new ArrayList { message });
-                    break;
-
-                default:
-                    try
-                    {
-                        var ms = new MemoryStream();
-                        formatter.Serialize(ms, message);
-                        buffer = ms.ToArray();
-                    }
-                    catch (Exception)
-                    {
-                        logger.LogError("Exception serializing output");
-                        logger.LogError("{0} : {1}", message.GetType().Name, message.GetType().FullName);
-                        throw;
-                    }
-                    break;
-            }
-
-            return buffer;
-        }
-
+                
         private static int ReadDiagnosticsInfo(Stream networkStream)
         {
             int rddId = SerDe.ReadInt(networkStream);
@@ -505,22 +421,7 @@ private static int ReadDiagnosticsInfo(Stream networkStream)
             int partitionId = SerDe.ReadInt(networkStream);
             logger.LogInfo("rddInfo: rddId {0}, stageId {1}, partitionId {2}", rddId, stageId, partitionId);
             return stageId;
-        }
-
-        private static void WriteDiagnosticsInfo(Stream networkStream, DateTime bootTime, DateTime initTime)
-        {
-            DateTime finishTime = DateTime.UtcNow;
-            const string format = "MM/dd/yyyy hh:mm:ss.fff tt";
-            logger.LogDebug("bootTime: {0}, initTime: {1}, finish_time: {2}",
-                bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format));
-            SerDe.Write(networkStream, (int)SpecialLengths.TIMING_DATA);
-            SerDe.Write(networkStream, ToUnixTime(bootTime));
-            SerDe.Write(networkStream, ToUnixTime(initTime));
-            SerDe.Write(networkStream, ToUnixTime(finishTime));
-
-            SerDe.Write(networkStream, 0L); //shuffle.MemoryBytesSpilled  
-            SerDe.Write(networkStream, 0L); //shuffle.DiskBytesSpilled
-        }
+        }       
 
         private static void WriteAccumulatorValues(Stream networkStream, IFormatter formatter)
         {
@@ -564,121 +465,7 @@ public static void PrintFiles()
 
             logger.LogDebug("Files available in executor");
             logger.LogDebug("Location: {0}{1}{2}", folder, Environment.NewLine, outfiles.ToString());
-        }
-
-        private static long ToUnixTime(DateTime dt)
-        {
-            return (long)(dt - UnixTimeEpoch).TotalMilliseconds;
-        }
-
-        private static IEnumerable<dynamic> GetIterator(Stream inputStream, string serializedMode, int isFuncSqlUdf)
-        {
-            logger.LogInfo("Serialized mode in GetIterator: " + serializedMode);
-            IFormatter formatter = new BinaryFormatter();
-            var mode = (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode);
-            int messageLength;
-            Stopwatch watch = Stopwatch.StartNew();
-            while ((messageLength = SerDe.ReadInt(inputStream)) != (int)SpecialLengths.END_OF_DATA_SECTION)
-            {
-                watch.Stop();
-                if (messageLength > 0 || messageLength == (int)SpecialLengths.NULL)
-                {
-                    watch.Start();
-                    byte[] buffer = messageLength > 0 ? SerDe.ReadBytes(inputStream, messageLength) : null;
-                    watch.Stop();
-                    switch (mode)
-                    {
-                        case SerializedMode.String:
-                            {
-                                if (messageLength > 0)
-                                {
-                                    if (buffer == null)
-                                    {
-                                        logger.LogDebug("Buffer is null. Message length is {0}", messageLength);
-                                    }
-                                    yield return SerDe.ToString(buffer);
-                                }
-                                else
-                                {
-                                    yield return null;
-                                }
-                                break;
-                            }
-
-                        case SerializedMode.Row:
-                            {
-                                Debug.Assert(messageLength > 0);
-                                var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
-
-                                if (isFuncSqlUdf == 0)
-                                {
-                                    foreach (var row in unpickledObjects.Select(item => (item as RowConstructor).GetRow()))
-                                    {
-                                        yield return row;
-                                    }
-                                }
-                                else
-                                {
-                                    foreach (var row in unpickledObjects)
-                                    {
-                                        yield return row;
-                                    }
-                                }
-
-                                break;
-                            }
-
-                        case SerializedMode.Pair:
-                            {
-                                byte[] pairKey = buffer;
-                                byte[] pairValue;
-
-                                watch.Start();
-                                int valueLength = SerDe.ReadInt(inputStream);
-                                if (valueLength > 0)
-                                {
-                                    pairValue = SerDe.ReadBytes(inputStream, valueLength);
-                                }
-                                else if (valueLength == (int)SpecialLengths.NULL)
-                                {
-                                    pairValue = null;
-                                }
-                                else
-                                {
-                                    throw new Exception(string.Format("unexpected valueLength: {0}", valueLength));
-                                }
-                                watch.Stop();
-
-                                yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
-                                break;
-                            }
-
-                        case SerializedMode.None: //just return raw bytes
-                            {
-                                yield return buffer;
-                                break;
-                            }
-
-                        default:
-                            {
-                                if (buffer != null)
-                                {
-                                    var ms = new MemoryStream(buffer);
-                                    yield return formatter.Deserialize(ms);
-                                }
-                                else
-                                {
-                                    yield return null;
-                                }
-                                break;
-                            }
-                    }
-                }
-                watch.Start();
-            }
-
-            logger.LogInfo("total receive time: {0}", watch.ElapsedMilliseconds);
-        }
+        }                
 
         internal class SparkCLRAssemblyHandler
         {
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/Worker.csproj b/csharp/Worker/Microsoft.Spark.CSharp/Worker.csproj
index 36c9c1f2..2ba45523 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/Worker.csproj
+++ b/csharp/Worker/Microsoft.Spark.CSharp/Worker.csproj
@@ -46,6 +46,8 @@
     <Reference Include="Microsoft.CSharp" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="WorkerFunc.cs" />
+    <Compile Include="UDFCommand.cs" />
     <Compile Include="MultiThreadWorker.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="TaskRunner.cs" />
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/WorkerFunc.cs b/csharp/Worker/Microsoft.Spark.CSharp/WorkerFunc.cs
new file mode 100644
index 00000000..0c6a6389
--- /dev/null
+++ b/csharp/Worker/Microsoft.Spark.CSharp/WorkerFunc.cs
@@ -0,0 +1,25 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System.Runtime.Serialization;
+using Microsoft.Spark.CSharp.Core;
+using System.Collections.Generic;
+
+namespace Microsoft.Spark.CSharp
+{
+    internal class WorkerFunc
+    {
+        internal CSharpWorkerFunc CharpWorkerFunc { get; }
+
+        internal int ArgsCount { get; }
+
+        internal List<int> ArgOffsets { get; }
+
+        internal WorkerFunc(CSharpWorkerFunc func, int argsCount, List<int> argOffsets)
+        {
+            CharpWorkerFunc = func;
+            ArgsCount = argsCount;
+            ArgOffsets = argOffsets;
+        }                
+    }
+}
diff --git a/csharp/WorkerTest/MultiThreadWorkerTest.cs b/csharp/WorkerTest/MultiThreadWorkerTest.cs
index 0f0b307c..6488adeb 100644
--- a/csharp/WorkerTest/MultiThreadWorkerTest.cs
+++ b/csharp/WorkerTest/MultiThreadWorkerTest.cs
@@ -81,6 +81,7 @@ private int CreateServer(out Process worker, bool sparkReuseWorker)
             worker.Start();
             int serverPort = 0;
             serverPort = SerDe.ReadInt(worker.StandardOutput.BaseStream);
+            Environment.SetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT", serverPort.ToString());
 
             StreamReader stdoutReader = worker.StandardOutput;
             Task.Run(() => {
@@ -119,7 +120,7 @@ private int CreateServer(out Process worker, bool sparkReuseWorker)
         private ISocketWrapper CreateSocket(int serverPort)
         {
             var socket =SocketFactory.CreateSocket();
-            socket.Connect(IPAddress.Loopback, serverPort);
+            socket.Connect(IPAddress.Loopback, serverPort, null);
             return socket;
         }
 
@@ -131,6 +132,10 @@ private void WritePayloadHeaderToWorker(Stream s)
         {
             SerDe.Write(s, splitIndex);
             SerDe.Write(s, ver);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0L);
             SerDe.Write(s, sparkFilesDir);
             SerDe.Write(s, numberOfIncludesItems);
             SerDe.Write(s, numBroadcastVariables);
diff --git a/csharp/WorkerTest/WorkerTest.cs b/csharp/WorkerTest/WorkerTest.cs
index 18264375..1c0f6ea8 100644
--- a/csharp/WorkerTest/WorkerTest.cs
+++ b/csharp/WorkerTest/WorkerTest.cs
@@ -93,6 +93,7 @@ private ISocketWrapper CreateServer(out Process worker)
                 }
             };
 
+            Environment.SetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT", port.ToString());
             lock (syncLock)
             {
                 output.Clear();
@@ -125,6 +126,10 @@ private void WritePayloadHeaderToWorker(Stream s, int isSqlUdf = 0)
         {
             SerDe.Write(s, splitIndex);
             SerDe.Write(s, ver);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0);
+            SerDe.Write(s, 0L);
             SerDe.Write(s, sparkFilesDir);
             SerDe.Write(s, numberOfIncludesItems);
             SerDe.Write(s, numBroadcastVariables);
@@ -631,6 +636,10 @@ public void TestBroadcastVariablesInWorker()
             {
                 SerDe.Write(s, splitIndex);
                 SerDe.Write(s, ver);
+                SerDe.Write(s, 0);
+                SerDe.Write(s, 0);
+                SerDe.Write(s, 0);
+                SerDe.Write(s, 0L);
                 SerDe.Write(s, sparkFilesDir);
                 SerDe.Write(s, numberOfIncludesItems);
 
@@ -802,6 +811,10 @@ public void TestUdfSerialization()
             using (var inputStream = new MemoryStream(500))
             {
                 SerDe.Write(inputStream, "1.0"); //version
+                SerDe.Write(inputStream, 0);
+                SerDe.Write(inputStream, 0);
+                SerDe.Write(inputStream, 0);
+                SerDe.Write(inputStream, 0L);
                 SerDe.Write(inputStream, ""); //includes directory
                 SerDe.Write(inputStream, 0); //number of included items
                 SerDe.Write(inputStream, 0); //number of broadcast variables
diff --git a/csharp/WorkerTest/WorkerTest.csproj b/csharp/WorkerTest/WorkerTest.csproj
index 76c9ba87..8fa76dee 100644
--- a/csharp/WorkerTest/WorkerTest.csproj
+++ b/csharp/WorkerTest/WorkerTest.csproj
@@ -35,9 +35,8 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
-      <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
+    <Reference Include="Newtonsoft.Json">
+      <HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
     <Reference Include="Razorvine.Pyrolite, Version=4.10.0.26455, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
diff --git a/examples/Batch/WordCount/WordCount.csproj b/examples/Batch/WordCount/WordCount.csproj
index b655eb8f..1961a0bc 100644
--- a/examples/Batch/WordCount/WordCount.csproj
+++ b/examples/Batch/WordCount/WordCount.csproj
@@ -32,17 +32,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -84,4 +84,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Batch/pi/Pi.csproj b/examples/Batch/pi/Pi.csproj
index df0916b5..464f4b5d 100644
--- a/examples/Batch/pi/Pi.csproj
+++ b/examples/Batch/pi/Pi.csproj
@@ -35,17 +35,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -84,4 +84,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Examples.sln b/examples/Examples.sln
index 5ba0d238..3eaad7a5 100644
--- a/examples/Examples.sln
+++ b/examples/Examples.sln
@@ -1,6 +1,6 @@
 Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio 14
-VisualStudioVersion = 14.0.25123.0
+VisualStudioVersion = 14.0.25420.1
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HdfsWordCount", "Streaming\HdfsWordCount\HdfsWordCount.csproj", "{6A2C7CF9-D64E-490D-9841-269EE14F7932}"
 EndProject
diff --git a/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj b/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
index 30fd07f3..2f38f466 100644
--- a/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
+++ b/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
@@ -34,14 +34,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
-    <Reference Include="log4net">
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+    <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -80,4 +83,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Sql/HiveDataFrame/HiveDataFrame.csproj b/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
index 0040a3eb..c826a80f 100644
--- a/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
+++ b/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
@@ -33,17 +33,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
-    <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
-      <Private>True</Private>
+    <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
-      <Private>True</Private>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="System" />
     <Reference Include="System.Core" />
@@ -75,4 +75,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj b/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
index 24ecf84f..fb4fc633 100644
--- a/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
+++ b/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
@@ -34,17 +34,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -82,4 +82,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Sql/SparkXml/SparkXml.csproj b/examples/Sql/SparkXml/SparkXml.csproj
index d7701258..622b6a24 100644
--- a/examples/Sql/SparkXml/SparkXml.csproj
+++ b/examples/Sql/SparkXml/SparkXml.csproj
@@ -34,17 +34,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -82,4 +82,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Streaming/EventHub/EventHub.csproj b/examples/Streaming/EventHub/EventHub.csproj
index cc6d4e27..934eae56 100644
--- a/examples/Streaming/EventHub/EventHub.csproj
+++ b/examples/Streaming/EventHub/EventHub.csproj
@@ -34,16 +34,18 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.CSharp" />
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -85,4 +87,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj b/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
index 34facbb8..c58ceaee 100644
--- a/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
+++ b/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
@@ -32,21 +32,21 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="Newtonsoft.Json">
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
-    <Reference Include="CSharpWorker">
+    <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="log4net">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+    <Reference Include="Newtonsoft.Json">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Newtonsoft.Json.7.0.1\lib\net45\Newtonsoft.Json.dll</HintPath>
     </Reference>
     <Reference Include="Razorvine.Pyrolite">
       <SpecificVersion>False</SpecificVersion>
@@ -80,4 +80,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Streaming/Kafka/Kafka.csproj b/examples/Streaming/Kafka/Kafka.csproj
index 2bdaa816..68b15a7e 100644
--- a/examples/Streaming/Kafka/Kafka.csproj
+++ b/examples/Streaming/Kafka/Kafka.csproj
@@ -32,15 +32,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+    <Reference Include="CSharpWorker, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
-    <Reference Include="log4net, Version=1.2.10.0, Culture=neutral, PublicKeyToken=1b44e1d426115821, processorArchitecture=MSIL">
+    <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=2.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -79,4 +81,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj b/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
index e91905a4..81f5a19d 100644
--- a/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
+++ b/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
@@ -66,13 +66,13 @@
   </ItemGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net">
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="mscorlib" />
     <Reference Include="FSharp.Core, Version=$(TargetFSharpCoreVersion), Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
@@ -98,4 +98,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/fsharp/WordCount/WordCountFSharp.fsproj b/examples/fsharp/WordCount/WordCountFSharp.fsproj
index af96e494..86c3bdaa 100644
--- a/examples/fsharp/WordCount/WordCountFSharp.fsproj
+++ b/examples/fsharp/WordCount/WordCountFSharp.fsproj
@@ -71,20 +71,17 @@
   </ItemGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\CSharpWorker.exe</HintPath>
-      <Private>True</Private>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="FSharp.Core">
       <HintPath>..\..\packages\FSharp.Core.4.0.0.1\lib\net40\FSharp.Core.dll</HintPath>
       <Private>True</Private>
     </Reference>
     <Reference Include="log4net">
-      <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
-      <Private>True</Private>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
-      <Private>True</Private>
+      <HintPath>..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\Debug\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="mscorlib" />
     <Reference Include="Newtonsoft.Json">
@@ -110,4 +107,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/notes/running-mobius-app.md b/notes/running-mobius-app.md
index b430a0d7..ea776a39 100644
--- a/notes/running-mobius-app.md
+++ b/notes/running-mobius-app.md
@@ -145,7 +145,7 @@ The following sample commands show how to run Mobius examples in local mode. Usi
 Computes the _approximate_ value of Pi using two appropaches and displays the value.
 
 ### WordCount Example (Batch)
-* Run `sparkclr-submit.cmd --exe SparkClrWordCount.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <InputFilePath>`
+* Run `sparkclr-submit.cmd --exe SparkClrPi.exe C:\Git\Mobius\examples\Batch\WordCount\bin\Debug <InputFilePath>`
 
 `InputFilePath` should be in one of the following formats:
 * `hdfs://path/to/inputfile`
diff --git a/scala/pom.xml b/scala/pom.xml
index cb9ce900..ec526cda 100644
--- a/scala/pom.xml
+++ b/scala/pom.xml
@@ -2,7 +2,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.microsoft.sparkclr</groupId>
   <artifactId>spark-clr_2.11</artifactId>
-  <version>2.0.200-SNAPSHOT</version>
+  <version>2.3.1-SNAPSHOT</version>
   <name>Mobius Project</name>
   <description>C# language binding and extensions to Apache Spark</description>
   <url>https://github.com/Microsoft/Mobius</url>
@@ -35,7 +35,7 @@
     <maven.compiler.target>1.5</maven.compiler.target>
     <encoding>UTF-8</encoding>
     <scala.version>2.11.8</scala.version>
-    <spark.version>2.0.2</spark.version>
+    <spark.version>2.3.1</spark.version>
     <scala.binary.version>2.11</scala.binary.version>
   </properties>
 
@@ -106,14 +106,19 @@
       <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-hive_2.11</artifactId>
-          <version>2.0.0</version>
+          <version>${spark.version}</version>
           <!--the following is placeholder for building uber package. Please keep as-is-->
           <!--<scope>provided</scope>-->
       </dependency>
       <dependency>
           <groupId>com.databricks</groupId>
-          <artifactId>spark-csv_2.10</artifactId>
-          <version>1.4.0</version>
+          <artifactId>spark-csv_2.11</artifactId>
+          <version>1.5.0</version>
+      </dependency>
+      <dependency>
+          <groupId>com.databricks</groupId>
+          <artifactId>spark-avro_2.11</artifactId>
+          <version>4.0.0</version>
       </dependency>
   </dependencies>
 
diff --git a/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala b/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
index d48e9f3b..57ca3616 100644
--- a/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
+++ b/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
@@ -12,6 +12,7 @@ import java.util.{List => JList, Map => JMap}
 
 import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.spark.api.python._
+import org.apache.spark.api.python.PythonAccumulatorV2
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark._
@@ -34,7 +35,7 @@ class CSharpRDD(
     cSharpWorkerExecutable: String,
     unUsedVersionIdentifier: String,
     broadcastVars: JList[Broadcast[PythonBroadcast]],
-    accumulator: Accumulator[JList[Array[Byte]]])
+    accumulator: PythonAccumulatorV2)
   extends PythonRDD (
     parent,
     SQLUtils.createCSharpFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
@@ -95,7 +96,7 @@ class CSharpRDD(
     logInfo("Env vars: " + envVars.asScala.mkString(", "))
 
     val runner = new PythonRunner(
-      Seq(ChainedPythonFunctions(Seq(func))), bufferSize, reuse_worker, false, Array(Array(0)))
+      Seq(ChainedPythonFunctions(Seq(func))), bufferSize, reuseWorker)
     runner.compute(firstParent.iterator(split, context), split.index, context)
   }
 
diff --git a/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala b/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
index c01d76a7..79af72c3 100644
--- a/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
+++ b/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
@@ -8,6 +8,7 @@ package org.apache.spark.sql.api.csharp
 import java.io.{ByteArrayOutputStream, DataOutputStream}
 
 import org.apache.spark.{Accumulator, SparkContext}
+import org.apache.spark.api.python.PythonAccumulatorV2
 import org.apache.spark.api.csharp.SerDe
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.api.python.{PythonBroadcast, PythonFunction, SerDeUtil}
@@ -51,7 +52,7 @@ object SQLUtils {
                            cSharpWorkerExecutable: String,
                            unUsedVersionIdentifier: String,
                            broadcastVars: JList[Broadcast[PythonBroadcast]],
-                           accumulator: Accumulator[JList[Array[Byte]]]) : PythonFunction = {
+                           accumulator: PythonAccumulatorV2) : PythonFunction = {
     PythonFunction(command, envVars, cSharpIncludes, cSharpWorkerExecutable,
       unUsedVersionIdentifier, broadcastVars, accumulator)
   }
diff --git a/scala/src/main/org/apache/spark/util/csharp/Utils.scala b/scala/src/main/org/apache/spark/util/csharp/Utils.scala
index 7bb74190..7294cae6 100644
--- a/scala/src/main/org/apache/spark/util/csharp/Utils.scala
+++ b/scala/src/main/org/apache/spark/util/csharp/Utils.scala
@@ -127,17 +127,17 @@ object Utils extends Logging {
       timer.schedule(new TimerTask() {
         @Override
         def run() {
-          Runtime.getRuntime.halt(status)
+          if (status!=0) { Runtime.getRuntime.halt(status); }
         }
       }, maxDelayMillis)
       // try to exit nicely
-      System.exit(status);
+      if (status!=0) { System.exit(status); }
     } catch {
       // exit nastily if we have a problem
       case ex: Throwable => Runtime.getRuntime.halt(status)
     } finally {
       // should never get here
-      Runtime.getRuntime.halt(status)
+      if (status!=0) { Runtime.getRuntime.halt(status); }
     }
   }
 
@@ -147,7 +147,7 @@ object Utils extends Logging {
    * @param status  the exit status, zero for OK, non-zero for error
    */
   def exit(status: Int): Unit = {
-    exit(status, 1000)
+    exit(status, 1000);
   }
 
   private[spark] def listZipFileEntries(file: File): Array[String] = {
diff --git a/scripts/sparkclr-submit.cmd b/scripts/sparkclr-submit.cmd
index c6e1d501..5f119c87 100644
--- a/scripts/sparkclr-submit.cmd
+++ b/scripts/sparkclr-submit.cmd
@@ -42,7 +42,7 @@ if not exist "%SPARK_JARS_DIR%" (
 
 set SPARK_JARS_CLASSPATH=%SPARK_JARS_DIR%\*
 
-if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.11-2.0.200-SNAPSHOT.jar)
+if not defined SPARKCLR_JAR (set SPARKCLR_JAR=spark-clr_2.11-2.3.1-SNAPSHOT.jar)
 echo [sparkclr-submit.cmd] SPARKCLR_JAR=%SPARKCLR_JAR% 
 set SPARKCLR_CLASSPATH=%SPARKCLR_HOME%\lib\%SPARKCLR_JAR%
 REM SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
@@ -105,4 +105,4 @@ goto :eof
 	@echo Example 2:
 	@echo sparkclr-submit.cmd [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe c:\sparkclrapp\driver.zip arg1 arg2 arg3
 	@echo Example 3:
-	@echo sparkclr-submit.cmd [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar hdfs://path/to/spark-clr-1.6.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3
+	@echo sparkclr-submit.cmd [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar hdfs://path/to/spark-clr_2.11-2.3.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3
diff --git a/scripts/sparkclr-submit.sh b/scripts/sparkclr-submit.sh
index 5d94efa3..e4ca34f4 100755
--- a/scripts/sparkclr-submit.sh
+++ b/scripts/sparkclr-submit.sh
@@ -32,7 +32,7 @@ function usage() {
 	echo "Example 2:"
 	echo "sparkclr-submit.sh [--verbose] [--master local] [--deploy-mode client] [--name testapp] --exe csdriver.exe sparkclrapp/driver.zip arg1 arg2 arg3"
 	echo "Example 3:"
-	echo "sparkclr-submit.sh [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar --remote-sparkclr-jar hdfs://path/to/spark-clr_2.10-1.6.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3"
+	echo "sparkclr-submit.sh [--verbose] --master spark://host:port --deploy-mode cluster [--name testapp] --exe csdriver.exe --remote-sparkclr-jar --remote-sparkclr-jar hdfs://path/to/spark-clr_2.11-2.3.1-SNAPSHOT.jar hdfs://path/to/driver.zip arg1 arg2 arg3"
 }
 
 [ "$SPARK_HOME" = "" ] && spark_home_error
@@ -57,7 +57,7 @@ fi
 
 export SPARK_JARS_CLASSPATH="$SPARK_JARS_DIR/*"
 
-export SPARKCLR_JAR=spark-clr_2.11-2.0.200-SNAPSHOT.jar
+export SPARKCLR_JAR=spark-clr_2.11-2.3.1-SNAPSHOT.jar
 export SPARKCLR_CLASSPATH="$SPARKCLR_HOME/lib/$SPARKCLR_JAR"
 # SPARKCLR_DEBUGMODE_EXT_JARS environment variable is used to specify external dependencies to use in debug mode
 [ ! "$SPARKCLR_DEBUGMODE_EXT_JARS" = "" ] && export SPARKCLR_CLASSPATH="$SPARKCLR_CLASSPATH:$SPARKCLR_DEBUGMODE_EXT_JARS"

Name	Description
RegisterTempTable	Registers this DataFrame as a temporary table using the given name. The lifetime of this temporary table is tied to the SqlContext that was used to create this DataFrame.
Count	Number of rows in the DataFrame
Show	Displays rows of the DataFrame in tabular form
ShowSchema	Prints the schema information of the DataFrame
Collect	Returns all of Rows in this DataFrame
ToRDD	Converts the DataFrame to RDD of Row
ToJSON	Returns the content of the DataFrame as RDD of JSON strings
Explain	Prints the plans (logical and physical) to the console for debugging purposes
Select	Selects a set of columns specified by column name or Column. df.Select("colA", df["colB"]) df.Select("*", df["colB"] + 10)
Select	Selects a set of columns. This is a variant of `select` that can only select existing columns using column names (i.e. cannot construct expressions). df.Select("colA", "colB")
SelectExpr	Selects a set of SQL expressions. This is a variant of `select` that accepts SQL expressions. df.SelectExpr("colA", "colB as newName", "abs(colC)")
Where	Filters rows using the given condition
Filter	Filters rows using the given condition
GroupBy	Groups the DataFrame using the specified columns, so we can run aggregation on them.
Rollup	Create a multi-dimensional rollup for the current DataFrame using the specified columns, so we can run aggregation on them.
Cube	Create a multi-dimensional cube for the current DataFrame using the specified columns, so we can run aggregation on them.
Agg	Aggregates on the DataFrame for the given column-aggregate function mapping
Join	Join with another DataFrame - Cartesian join
Join	Join with another DataFrame - Inner equi-join using given column name
Join	Join with another DataFrame - Inner equi-join using given column name
Join	Join with another DataFrame, using the specified JoinType
Intersect	Intersect with another DataFrame. This is equivalent to `INTERSECT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, intersect(self, other)
UnionAll	Union with another DataFrame WITHOUT removing duplicated rows. This is equivalent to `UNION ALL` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, unionAll(self, other)
Subtract	Returns a new DataFrame containing rows in this frame but not in another frame. This is equivalent to `EXCEPT` in SQL. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, subtract(self, other)
Drop	Returns a new DataFrame with a column dropped. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, drop(self, col)
DropNa	Returns a new DataFrame omitting rows with null values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropna(self, how='any', thresh=None, subset=None)
Na	Returns a DataFrameNaFunctions for working with missing data.
FillNa	Replace null values, alias for ``na.fill()`
DropDuplicates	Returns a new DataFrame with duplicate rows removed, considering only the subset of columns. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dropDuplicates(self, subset=None)
Replace``1	Returns a new DataFrame replacing a value with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)
ReplaceAll``1	Returns a new DataFrame replacing values with other values. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)
ReplaceAll``1	Returns a new DataFrame replacing values with another value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, replace(self, to_replace, value, subset=None)
RandomSplit	Randomly splits this DataFrame with the provided weights. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, randomSplit(self, weights, seed=None)
Columns	Returns all column names as a list. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, columns(self)
DTypes	Returns all column names and their data types. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, dtypes(self)
Sort	Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, cols, *kwargs)
Sort	Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, cols, *kwargs)
SortWithinPartitions	Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, cols, *kwargs)
SortWithinPartition	Returns a new DataFrame sorted by the specified column(s). Reference to https://github.com/apache/spark/blob/branch-1.6/python/pyspark/sql/dataframe.py, sortWithinPartitions(self, cols, *kwargs)
Alias	Returns a new DataFrame with an alias set. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, alias(self, alias)
WithColumn	Returns a new DataFrame by adding a column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumn(self, colName, col)
WithColumnRenamed	Returns a new DataFrame by renaming an existing column. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, withColumnRenamed(self, existing, new)
Corr	Calculates the correlation of two columns of a DataFrame as a double value. Currently only supports the Pearson Correlation Coefficient. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, corr(self, col1, col2, method=None)
Cov	Calculate the sample covariance of two columns as a double value. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, cov(self, col1, col2)
FreqItems	Finding frequent items for columns, possibly with false positives. Using the frequent element count algorithm described in "http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou". Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, freqItems(self, cols, support=None) Note: This function is meant for exploratory data analysis, as we make no guarantee about the backward compatibility of the schema of the resulting DataFrame.
Crosstab	Computes a pair-wise frequency table of the given columns. Also known as a contingency table. The number of distinct values for each column should be less than 1e4. At most 1e6 non-zero pair frequencies will be returned. Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, crosstab(self, col1, col2)
Describe	Computes statistics for numeric columns. This include count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical columns.
Limit	Returns a new DataFrame by taking the first `n` rows. The difference between this function and `head` is that `head` returns an array while `limit` returns a new DataFrame.
Head	Returns the first `n` rows.
First	Returns the first row.
Take	Returns the first `n` rows in the DataFrame.
Distinct	Returns a new DataFrame that contains only the unique rows from this DataFrame.
Coalesce	Returns a new DataFrame that has exactly `numPartitions` partitions. Similar to coalesce defined on an RDD, this operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of the current partitions.
Persist	Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
Unpersist	Mark the DataFrame as non-persistent, and remove all blocks for it from memory and disk.
Cache	Persist this DataFrame with the default storage level (`MEMORY_AND_DISK`)
Repartition	Returns a new DataFrame that has exactly `numPartitions` partitions.
Repartition	Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.
Repartition	Returns a new [[DataFrame]] partitioned by the given partitioning columns into . The resulting DataFrame is hash partitioned. optional. If not specified, keep current partitions.
Sample	Returns a new DataFrame by sampling a fraction of rows.
FlatMap``1	Returns a new RDD by first applying a function to all rows of this DataFrame, and then flattening the results.
Map``1	Returns a new RDD by applying a function to all rows of this DataFrame.
MapPartitions``1	Returns a new RDD by applying a function to each partition of this DataFrame.
ForeachPartition	Applies a function f to each partition of this DataFrame.
Foreach	Applies a function f to all rows.
Write	Interface for saving the content of the DataFrame out into external storage.
SaveAsParquetFile	Saves the contents of this DataFrame as a parquet file, preserving the schema. Files that are written out using this method can be read back in as a DataFrame using the `parquetFile` function in SQLContext.
InsertInto	Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
SaveAsTable	Creates a table from the the contents of this DataFrame based on a given data source, SaveMode specified by mode, and a set of options. Note that this currently only works with DataFrames that are created from a HiveContext as there is no notion of a persisted catalog in a standard SQL context. Instead you can write an RDD out to a parquet file, and then register that file as a table. This "table" can then be the target of an `insertInto`. Also note that while this function can persist the table metadata into Hive's metastore, the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
Save	Saves the contents of this DataFrame based on the given data source, SaveMode specified by mode, and a set of options.
	Returns a new DataFrame that drops rows containing any null values.
	Returns a new DataFrame that drops rows containing null values. If `how` is "any", then drop rows containing any null values. If `how` is "all", then drop rows only if every column is null for that row.
	Returns a new [[DataFrame]] that drops rows containing null values in the specified columns. If `how` is "any", then drop rows containing any null values in the specified columns. If `how` is "all", then drop rows only if every specified column is null for that row.
	Returns a new DataFrame that drops rows containing any null values in the specified columns.
	Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values.
	Returns a new DataFrame that drops rows containing less than `minNonNulls` non-null values values in the specified columns.
	Returns a new DataFrame that replaces null values in numeric columns with `value`.
	Returns a new DataFrame that replaces null values in string columns with `value`.
	Returns a new DataFrame that replaces null values in specified numeric columns. If a specified column is not a numeric column, it is ignored.
	Returns a new DataFrame that replaces null values in specified string columns. If a specified column is not a numeric column, it is ignored.
	Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. The value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`. For example, the following replaces null values in column "A" with string "unknown", and null values in column "B" with numeric value 1.0. import com.google.common.collect.ImmutableMap; df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
	Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height". df.replace("height", ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name". df.replace("name", ImmutableMap.of("UNKNOWN", "unnamed")); // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns. df.replace("", ImmutableMap.of("UNKNOWN", "unnamed"));
	Replaces values matching keys in `replacement` map with the corresponding values. Key and value of `replacement` map must have the same type, and can only be doubles or strings. If `col` is "*", then the replacement is applied on all string columns or numeric columns. Example: import com.google.common.collect.ImmutableMap; // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight". df.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0)); // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname". df.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));
	Specifies the input data source format.
	Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.
	Adds an input option for the underlying data source.
	Adds input options for the underlying data source.
	Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).
	Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).
	Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.
	Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.
	Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.
	Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.
	Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.
	Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.
	Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.
	Specifies the underlying output data source. Built-in options include "parquet", "json", etc.
	Adds an output option for the underlying data source.
	Adds output options for the underlying data source.
	Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.
	Saves the content of the DataFrame at the specified path.
	Saves the content of the DataFrame as the specified table.
	Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.
	Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.
	Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.
	Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)
	Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)
Name	Description
Format	Specifies the input data source format.
Schema	Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema automatically from data. By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading.
Option	Adds an input option for the underlying data source.
Options	Adds input options for the underlying data source.
Load	Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by a local or distributed file system).
Load	Loads input in as a DataFrame, for data sources that don't require a path (e.g. external key-value stores).
Jdbc	Construct a [[DataFrame]] representing the database table accessible via JDBC URL, url named table and connection properties.
Jdbc	Construct a DataFrame representing the database table accessible via JDBC URL url named table. Partitions of the table will be retrieved in parallel based on the parameters passed to this function. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.
Jdbc	Construct a DataFrame representing the database table accessible via JDBC URL url named table using connection properties. The `predicates` parameter gives a list expressions suitable for inclusion in WHERE clauses; each one defines one partition of the DataFrame. Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.
Json	Loads a JSON file (one object per line) and returns the result as a DataFrame. This function goes through the input once to determine the input schema. If you know the schema in advance, use the version that specifies the schema to avoid the extra scan.
Parquet	Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty DataFrame if no paths are passed in.
Name	Description
Mode	Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.
Mode	Specifies the behavior when data or table already exists. Options include: - `SaveMode.Overwrite`: overwrite the existing data. - `SaveMode.Append`: append the data. - `SaveMode.Ignore`: ignore the operation (i.e. no-op). - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.
Format	Specifies the underlying output data source. Built-in options include "parquet", "json", etc.
Option	Adds an output option for the underlying data source.
Options	Adds output options for the underlying data source.
PartitionBy	Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. This is only applicable for Parquet at the moment.
Save	Saves the content of the DataFrame at the specified path.
Save	Saves the content of the DataFrame as the specified table.
InsertInto	Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table. Because it inserts data to an existing table, format or options will be ignored.
SaveAsTable	Saves the content of the DataFrame as the specified table. In the case the table already exists, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). When `mode` is `Overwrite`, the schema of the DataFrame does not need to be the same as that of the existing table. When `mode` is `Append`, the schema of the DataFrame need to be the same as that of the existing table, and format or options will be ignored.
Jdbc	Saves the content of the DataFrame to a external database table via JDBC. In the case the table already exists in the external database, behavior of this function depends on the save mode, specified by the `mode` function (default to throwing an exception). Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash your external database systems.
Json	Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("json").Save(path)
Parquet	Saves the content of the DataFrame in JSON format at the specified path. This is equivalent to: Format("parquet").Save(path)