In [1]:
#!pip install pyignite 

In [2]:
from pyignite import Client

client = Client()
client.connect('127.0.0.1', 10800)

#Create cache
my_cache = client.create_cache('my cache')

#Put value in cache
my_cache.put(1, 'Hello World')

# #Get value from cache
# result = my_cache.get(1)
# print(result)

Hello World


In [4]:
my_cache.put(2, 'Hello World2')

result = my_cache.get(2)
print(result)

Hello World2


In [6]:
from pprint import pprint

from helpers.converters import obj_to_dict
from helpers.sql_helper import TableNames, Query, TestData
from pyignite import Client
from pyignite.datatypes.prop_codes import PROP_NAME, PROP_QUERY_ENTITIES

In [7]:
# create tables
for query in [
    Query.COUNTRY_CREATE_TABLE,
    Query.CITY_CREATE_TABLE,
    Query.LANGUAGE_CREATE_TABLE,
]:
    client.sql(query)

In [8]:
# create indices
for query in [Query.CITY_CREATE_INDEX, Query.LANGUAGE_CREATE_INDEX]:
    client.sql(query)

# load data
for row in TestData.COUNTRY:
    client.sql(Query.COUNTRY_INSERT, query_args=row)

for row in TestData.CITY:
    client.sql(Query.CITY_INSERT, query_args=row)

for row in TestData.LANGUAGE:
    client.sql(Query.LANGUAGE_INSERT, query_args=row)

In [9]:
# examine the storage
result = client.get_cache_names()
pprint(result)
# ['SQL_PUBLIC_CITY', 'SQL_PUBLIC_COUNTRY', 'SQL_PUBLIC_COUNTRYLANGUAGE']

city_cache = client.get_or_create_cache('SQL_PUBLIC_CITY')
pprint(city_cache.settings[PROP_NAME])
# 'SQL_PUBLIC_CITY'

pprint(city_cache.settings[PROP_QUERY_ENTITIES])

['SQL_PUBLIC_CITY',
 'my cache',
 'SQL_PUBLIC_COUNTRY',
 'SQL_PUBLIC_COUNTRYLANGUAGE']
'SQL_PUBLIC_CITY'
[{'field_name_aliases': [{'alias': 'DISTRICT', 'field_name': 'DISTRICT'},
                         {'alias': 'POPULATION', 'field_name': 'POPULATION'},
                         {'alias': 'COUNTRYCODE', 'field_name': 'COUNTRYCODE'},
                         {'alias': 'ID', 'field_name': 'ID'},
                         {'alias': 'NAME', 'field_name': 'NAME'}],
  'key_field_name': None,
  'key_type_name': 'SQL_PUBLIC_CITY_c0c762fe_2f6c_4a64_ae5e_3ca7552dca28_KEY',
  'query_fields': [{'default_value': None,
                    'is_key_field': True,
                    'is_notnull_constraint_field': False,
                    'name': 'ID',
                    'precision': -1,
                    'scale': -1,
                    'type_name': 'java.lang.Integer'},
                   {'default_value': None,
                    'is_key_field': False,
                    'is_notnull_constrain

In [10]:
print('-' * 20)
with city_cache.scan() as cursor:
    for line in next(cursor):
        pprint(obj_to_dict(line))

--------------------
{'COUNTRYCODE': 'CHN',
 'ID': 1890,
 'type_name': 'SQL_PUBLIC_CITY_c0c762fe_2f6c_4a64_ae5e_3ca7552dca28_KEY'}
{'DISTRICT': 'Shanghai',
 'NAME': 'Shanghai',
 'POPULATION': 9696300,
 'type_name': 'SQL_PUBLIC_CITY_c0c762fe_2f6c_4a64_ae5e_3ca7552dca28'}


In [12]:
print('-' * 20)
with client.sql('SELECT _KEY, _VAL FROM CITY WHERE ID = ?', query_args=[1890]) as cursor:
    for line in next(cursor):
        # pprint(obj_to_dict(line))
        pprint(line)

--------------------
SQL_PUBLIC_CITY_c0c762fe_2f6c_4a64_ae5e_3ca7552dca28_KEY(ID=1890, COUNTRYCODE='CHN', version=1)
SQL_PUBLIC_CITY_c0c762fe_2f6c_4a64_ae5e_3ca7552dca28(NAME='Shanghai', DISTRICT='Shanghai', POPULATION=9696300, version=1)


In [13]:
MOST_POPULATED_IN_3_COUNTRIES = '''
SELECT country.name as country_name, city.name as city_name, MAX(city.population) AS max_pop FROM country
    JOIN city ON city.countrycode = country.code
    WHERE country.code IN ('USA','IND','CHN')
    GROUP BY country.name, city.name ORDER BY max_pop DESC LIMIT 10
'''

with client.sql(MOST_POPULATED_IN_3_COUNTRIES, include_field_names=True) as cursor:
    print('Most 10 populated cities in USA, India and China:')
    table_str_pattern = '{:15}\t| {:20}\t| {}'
    print(table_str_pattern.format(*next(cursor)))
    print('*' * 50)
    for row in cursor:
        print(table_str_pattern.format(*row))

Most 10 populated cities in USA, India and China:
COUNTRY_NAME   	| CITY_NAME           	| MAX_POP
**************************************************
India          	| Mumbai (Bombay)     	| 10500000
China          	| Shanghai            	| 9696300
United States  	| New York            	| 8008278
China          	| Peking              	| 7472000
India          	| Delhi               	| 7206704
China          	| Chongqing           	| 6351600
China          	| Tianjin             	| 5286800
India          	| Calcutta [Kolkata]  	| 4399819
China          	| Wuhan               	| 4344600
China          	| Harbin              	| 4289800


In [14]:
Query.COUNTRY_INSERT

'INSERT INTO Country(\n        Code, Name, Continent, Region,\n        SurfaceArea, IndepYear, Population,\n        LifeExpectancy, GNP, GNPOld,\n        LocalName, GovernmentForm, HeadOfState,\n        Capital, Code2\n    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'

In [15]:
for row in TestData.COUNTRY:
    print(row)
# client.sql(Query.CITY_INSERT, query_args=row)

['USA', 'United States', 'North America', 'North America', Decimal('9363520.00'), 1776, 278357000, Decimal('77.1'), Decimal('8510700.00'), Decimal('8110900.00'), 'United States', 'Federal Republic', 'George W. Bush', 3813, 'US']
['IND', 'India', 'Asia', 'Southern and Central Asia', Decimal('3287263.00'), 1947, 1013662000, Decimal('62.5'), Decimal('447114.00'), Decimal('430572.00'), 'Bharat/India', 'Federal Republic', 'Kocheril Raman Narayanan', 1109, 'IN']
['CHN', 'China', 'Asia', 'Eastern Asia', Decimal('9572900.00'), -1523, 1277558000, Decimal('71.4'), Decimal('982268.00'), Decimal('917719.00'), 'Zhongquo', 'PeoplesRepublic', 'Jiang Zemin', 1891, 'CN']


In [28]:
create_sql = """
CREATE TABLE SEG_MAP (
    ID int(11) PRIMARY KEY,
    SEG VARCHAR(20) ,
    SA_ID VARCHAR(20)
)
"""
insert_sql = """
INSERT INTO SEG_MAP(ID, SEG, SA_ID) VALUES(?, ?, ?)
"""
rows = [
    [ 1, 'SEG_01', '1'],
    [ 2, 'SEG_01', '2']
]

In [26]:
client.sql(create_sql)

<pyignite.cursors.SqlFieldsCursor at 0x7fd8909e8978>

In [29]:
for row in rows:
    client.sql(insert_sql, query_args=row)

In [30]:
with client.sql("select * from SEG_MAP") as cursor:
    for row in cursor:
        print(row)

[1, 'SEG_01', '1']
[2, 'SEG_01', '2']


In [36]:
# 200 개 SEG(일련변호), 50만 setop (0~600만 랜덤 샘플링  
# 200 * 50 * 10000 => 1억건 맵 샘플 데이터를 바로 ignite에 입력해 보기  
arr_seg = range(0, 200)
arr_sa_id = range(0, 5000)
insert_sql = """
INSERT INTO SEG_MAP(ID, SEG, SA_ID) VALUES(?, ?, ?)
"""
rows = []

for i in arr_sa_id:
    rows.append([i, 'SEG_01', i])
        
print(len(rows))

5000


In [37]:
client.sql("delete from SEG_MAP")

<pyignite.cursors.SqlFieldsCursor at 0x7fd890c6d6d8>

In [38]:
%%time
for row in rows:
    client.sql(insert_sql, query_args=row)

CPU times: user 4.27 s, sys: 324 ms, total: 4.6 s
Wall time: 8.22 s


In [39]:
import pyspark.pandas 



In [None]:
# insert 방식 데이터 생성 속도 느림.
# spark dataframe -> ignite 저장 방식으로 테스트  
sdf.write.format("ignite").option("table", "seg_map").save()