# Perf Improvements 

## Cyrillic - 2 Bytes per char

### Setup database and Tables

In [1]:
USE master;
DROP DATABASE IF EXISTS UnicodeDatabase_Cyrillic;
CREATE DATABASE UnicodeDatabase_Cyrillic COLLATE LATIN1_GENERAL_100_CI_AS_SC_UTF8
GO

USE UnicodeDatabase_Cyrillic
GO
DROP TABLE IF EXISTS dbo.Inserts_UTF16
CREATE TABLE dbo.Inserts_UTF16(
    ID int IDENTITY(1,1) NOT NULL PRIMARY KEY
    , col1 NVARCHAR(50) NOT NULL)
GO
DROP TABLE IF EXISTS dbo.Inserts_UTF8
CREATE TABLE dbo.Inserts_UTF8(
    ID int IDENTITY(1,1) NOT NULL PRIMARY KEY
    , col1 VARCHAR(50) NOT NULL)
GO
DROP TABLE IF EXISTS dbo.Inserts_UTF16_Compressed
CREATE TABLE dbo.Inserts_UTF16_Compressed(
    ID int IDENTITY(1,1) NOT NULL PRIMARY KEY
    , col1 NVARCHAR(50) NOT NULL)
WITH (DATA_COMPRESSION = PAGE)
GO
DROP TABLE IF EXISTS dbo.Inserts_UTF8_Compressed
CREATE TABLE dbo.Inserts_UTF8_Compressed(
    ID int IDENTITY(1,1) NOT NULL PRIMARY KEY
    , col1 VARCHAR(50) NOT NULL)
WITH (DATA_COMPRESSION = PAGE)
GO

## INSERT perf UTF16

In [2]:
USE UnicodeDatabase_Cyrillic
GO
SET NOCOUNT ON;
BEGIN TRAN
DECLARE @i int = 1
WHILE @i < 1000000
BEGIN
    INSERT INTO dbo.Inserts_UTF16 (col1) 
	SELECT REPLICATE(CONCAT(
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45)))
	  ), 2);
    SET @i += 1
END;
COMMIT
GO

## INSERT perf UTF8

In [3]:
USE UnicodeDatabase_Cyrillic
GO
SET NOCOUNT ON;
BEGIN TRAN
DECLARE @i int = 1
WHILE @i < 1000000
BEGIN
    INSERT INTO dbo.Inserts_UTF8 (col1) 
	SELECT REPLICATE(CONCAT(
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45)))
	  ), 2);
    SET @i += 1
END;
COMMIT
GO

## INSERT perf UTF16 Compressed

In [4]:
USE UnicodeDatabase_Cyrillic
GO
SET NOCOUNT ON;
BEGIN TRAN
DECLARE @i int = 1
WHILE @i < 1000000
BEGIN
    INSERT INTO dbo.Inserts_UTF16_Compressed (col1) 
	SELECT REPLICATE(CONCAT(
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45)))
	  ), 2);
    SET @i += 1
END;
COMMIT
GO

## INSERT perf UTF8 Compressed

In [5]:
USE UnicodeDatabase_Cyrillic
GO
SET NOCOUNT ON;
BEGIN TRAN
DECLARE @i int = 1
WHILE @i < 1000000
BEGIN
    INSERT INTO dbo.Inserts_UTF8_Compressed (col1) 
	SELECT REPLICATE(CONCAT(
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45)))
	  ), 2);
    SET @i += 1
END;
COMMIT
GO

## SELECTs

Recreate tables

In [2]:
USE [UnicodeDatabase_Cyrillic];
GO

DROP TABLE IF EXISTS dbo.Inserts_UTF16
CREATE TABLE dbo.Inserts_UTF16(
    ID int IDENTITY(1,1) NOT NULL PRIMARY KEY
    , col1 NVARCHAR(50) NOT NULL)
GO
DROP TABLE IF EXISTS dbo.Inserts_UTF8
CREATE TABLE dbo.Inserts_UTF8(
    ID int IDENTITY(1,1) NOT NULL PRIMARY KEY
    , col1 VARCHAR(50) NOT NULL)
GO
DROP TABLE IF EXISTS dbo.Inserts_UTF16_Compressed
CREATE TABLE dbo.Inserts_UTF16_Compressed(
    ID int IDENTITY(1,1) NOT NULL PRIMARY KEY
    , col1 NVARCHAR(50) NOT NULL)
WITH (DATA_COMPRESSION = PAGE)
GO
DROP TABLE IF EXISTS dbo.Inserts_UTF8_Compressed
CREATE TABLE dbo.Inserts_UTF8_Compressed(
    ID int IDENTITY(1,1) NOT NULL PRIMARY KEY
    , col1 VARCHAR(50) NOT NULL)
WITH (DATA_COMPRESSION = PAGE)
GO

### Insert same data set to all tables

In [3]:
USE [UnicodeDatabase_Cyrillic];
GO

-- UTF16
SET NOCOUNT ON;
BEGIN TRAN
DECLARE @i int = 1, @start datetime
SELECT @start = GETDATE()
WHILE @i < 1000000
BEGIN
    INSERT INTO dbo.Inserts_UTF16 (col1) 
	SELECT REPLICATE(CONCAT(
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45))),
	  NCHAR(FLOOR(1070 + (RAND() * 45)))
	  ), 2);
    SET @i += 1
END;
COMMIT
GO
-- UTF8
SET NOCOUNT ON;
BEGIN TRAN
INSERT INTO dbo.Inserts_UTF8 (col1) 
SELECT col1 FROM dbo.Inserts_UTF16
COMMIT
GO
-- UTF16 Compressed
SET NOCOUNT ON;
BEGIN TRAN
INSERT INTO dbo.Inserts_UTF16_Compressed (col1) 
SELECT col1 FROM dbo.Inserts_UTF16
COMMIT
GO
-- UTF8 Compressed
SET NOCOUNT ON;
BEGIN TRAN
INSERT INTO dbo.Inserts_UTF8_Compressed (col1) 
SELECT col1 FROM dbo.Inserts_UTF16
COMMIT
GO

### Check data record sizes

Note data lenght sizes are the same whether compressed or not

In [4]:
USE [UnicodeDatabase_Cyrillic];
GO
SELECT TOP 1 DATALENGTH(col1) AS [DataLength_UTF16]
FROM Inserts_UTF16
GO
SELECT TOP 1 DATALENGTH(col1) AS [DataLength_UTF8]
FROM Inserts_UTF8
GO

DataLength_UTF16
40


DataLength_UTF8
40


### Check table sizes

In [7]:
USE [UnicodeDatabase_Cyrillic];
GO

SELECT OBJECT_NAME(p.OBJECT_ID) AS TableName,
	p.ROWS AS NumRows, a.used_pages, a.total_pages,
	CONVERT(DECIMAL(19,2),ISNULL(a.used_pages,0))*8/1024 AS DataSizeMB
FROM sys.allocation_units a
INNER JOIN sys.partitions p ON p.hobt_id = a.container_id
	AND OBJECT_NAME(p.OBJECT_ID) LIKE 'Inserts%'
ORDER BY TableName
GO

TableName,NumRows,used_pages,total_pages,DataSizeMB
Inserts_UTF16,999999,7070,7073,55.234375
Inserts_UTF16_Compressed,999999,4144,4161,32.375
Inserts_UTF8,999999,7070,7097,55.234375
Inserts_UTF8_Compressed,999999,6602,6625,51.578125


## Simple Read performance

In [9]:
USE [UnicodeDatabase_Cyrillic];
GO

SET STATISTICS IO, TIME ON
GO

DBCC DROPCLEANBUFFERS
GO

SELECT * FROM Inserts_UTF16
WHERE col1 LIKE 'ч%'
GO

ID,col1
33,чъєжетљѐЮбчъєжетљѐЮб
51,чеењесЮэыкчеењесЮэык
60,чјљытоцљйѕчјљытоцљйѕ
111,чхбчЯрьѐрсчхбчЯрьѐрс
123,чиілфъбѐшшчиілфъбѐшш
246,чмѓкхрбѓавчмѓкхрбѓав
356,чётжрюужйјчётжрюужйј
411,чаЮњјтбулэчаЮњјтбулэ
494,чшэяжзияяъчшэяжзияяъ
539,чњмлѐэщйиъчњмлѐэщйиъ


In [10]:
USE [UnicodeDatabase_Cyrillic];
GO

DBCC DROPCLEANBUFFERS
GO

SELECT * FROM Inserts_UTF8
WHERE col1 LIKE 'ч%'
GO

ID,col1
33,чъєжетљѐЮбчъєжетљѐЮб
51,чеењесЮэыкчеењесЮэык
60,чјљытоцљйѕчјљытоцљйѕ
111,чхбчЯрьѐрсчхбчЯрьѐрс
123,чиілфъбѐшшчиілфъбѐшш
246,чмѓкхрбѓавчмѓкхрбѓав
356,чётжрюужйјчётжрюужйј
411,чаЮњјтбулэчаЮњјтбулэ
494,чшэяжзияяъчшэяжзияяъ
539,чњмлѐэщйиъчњмлѐэщйиъ


In [11]:
USE [UnicodeDatabase_Cyrillic];
GO

DBCC DROPCLEANBUFFERS
GO

SELECT * FROM Inserts_UTF16_Compressed
WHERE col1 LIKE 'ч%'
GO


ID,col1
33,чъєжетљѐЮбчъєжетљѐЮб
51,чеењесЮэыкчеењесЮэык
60,чјљытоцљйѕчјљытоцљйѕ
111,чхбчЯрьѐрсчхбчЯрьѐрс
123,чиілфъбѐшшчиілфъбѐшш
246,чмѓкхрбѓавчмѓкхрбѓав
356,чётжрюужйјчётжрюужйј
411,чаЮњјтбулэчаЮњјтбулэ
494,чшэяжзияяъчшэяжзияяъ
539,чњмлѐэщйиъчњмлѐэщйиъ


In [12]:
USE [UnicodeDatabase_Cyrillic];
GO

DBCC DROPCLEANBUFFERS
GO

SELECT * FROM Inserts_UTF8_Compressed
WHERE col1 LIKE 'ч%'
GO

ID,col1
10018,чЮішіѐђннычЮішіѐђнны
10167,чдыђэчсівлчдыђэчсівл
10195,чямјбфкзѐйчямјбфкзѐй
10212,чѕѕурљбђшїчѕѕурљбђшї
10314,ччрѐајсіёвччрѐајсіёв
10325,чьщёбнѕюъечьщёбнѕюъе
10359,чјнугѕктьччјнугѕктьч
10381,чљжѓедьрыдчљжѓедьрыд
10509,члрањфшЯцмчлрањфшЯцм
10523,чЮюьѕбюѓтјчЮюьѕбюѓтј


In [13]:
SET STATISTICS IO, TIME OFF
GO