In [None]:

import pandas as pd
from clickhouse_driver import Client

CH_HOST = 'localhost'
CH_PORT = 9000
CH_USER = 'default'
CH_PASSWORD = ''
CH_DATABASE = 'default'
CH_TABLE = 'e_otinish_clean'

csv_file = 'e_otinish_dummy.csv'
df = pd.read_csv(csv_file, encoding='utf-8-sig')

df = df.rename(columns={
    'ID': 'id',
    '–î–∞—Ç–∞ –æ–±—Ä–∞—â–µ–Ω–∏—è': 'request_date',
    '–ö–∞—Ç–µ–≥–æ—Ä–∏—è': 'category',
    '–ü–æ–¥–∫–∞—Ç–µ–≥–æ—Ä–∏—è': 'subcategory',
    '–†–µ–≥–∏–æ–Ω': 'region',
    '–í–æ–∑—Ä–∞—Å—Ç –∑–∞—è–≤–∏—Ç–µ–ª—è': 'applicant_age',
    '–ü–æ–ª': 'gender',
    '–°—Ç–∞—Ç—É—Å': 'status',
    '–î–∞—Ç–∞ –æ—Ç–≤–µ—Ç–∞': 'response_date',
    '–ö–æ–ª-–≤–æ –¥–µ—Ç–µ–π': 'children_count',
    '–í–æ–∑—Ä–∞—Å—Ç —Å—Ç–∞—Ä—à–µ–≥–æ —Ä–µ–±–µ–Ω–∫–∞': 'oldest_child_age',
    '–û—Ç–≤–µ—Ç—Å—Ç–≤–µ–Ω–Ω—ã–π –æ—Ä–≥–∞–Ω': 'responsible_agency',
    '–¢–∏–ø –∑–∞—è–≤–∏—Ç–µ–ª—è': 'applicant_type',
    '–°–ø–æ—Å–æ–± –ø–æ–¥–∞—á–∏': 'submission_method',
    '–Ø–∑—ã–∫ –æ–±—Ä–∞—â–µ–Ω–∏—è': 'language',
    '–ü–æ–≤—Ç–æ—Ä–Ω–æ–µ –æ–±—Ä–∞—â–µ–Ω–∏–µ': 'is_repeat',
    '–ö–∞–Ω–∞–ª –ø–æ–ª—É—á–µ–Ω–∏—è –æ—Ç–≤–µ—Ç–∞': 'response_channel',
    '–ö–∞—Ç–µ–≥–æ—Ä–∏—è —Å—Ä–æ—á–Ω–æ—Å—Ç–∏': 'urgency',
    '–í–∏–¥ –æ–±—Ä–∞—â–µ–Ω–∏—è': 'appeal_type',
    '–¢–µ–º–∞—Ç–∏–∫–∞': 'topic'
})

df['request_date'] = pd.to_datetime(df['request_date']).dt.date
df['response_date'] = pd.to_datetime(df['response_date']).dt.date
df['applicant_age'] = df['applicant_age'].astype('UInt8')
df['children_count'] = df['children_count'].fillna(0).astype('UInt8')
df['oldest_child_age'] = df['oldest_child_age'].fillna(0).astype('UInt8')

client = Client(host=CH_HOST, port=CH_PORT, user=CH_USER, password=CH_PASSWORD, database=CH_DATABASE)
client.execute(f"DROP TABLE IF EXISTS {CH_TABLE}")
print(f"üóë –¢–∞–±–ª–∏—Ü–∞ '{CH_TABLE}' —É–¥–∞–ª–µ–Ω–∞ (–µ—Å–ª–∏ —Å—É—â–µ—Å—Ç–≤–æ–≤–∞–ª–∞).")

def create_ch_table():
    client.execute(f"""
        CREATE TABLE {CH_TABLE} (
            id UInt32,
            request_date Date,
            category String,
            subcategory String,
            region String,
            applicant_age UInt8,
            gender String,
            status String,
            response_date Date,
            children_count UInt8,
            oldest_child_age UInt8,
            responsible_agency String,
            applicant_type String,
            submission_method String,
            language String,
            is_repeat String,
            response_channel String,
            urgency String,
            appeal_type String,
            topic String
        ) ENGINE = MergeTree()
        ORDER BY id
    """)

def upload_to_ch():
    data = [tuple(x) for x in df.to_records(index=False)]
    client.execute(f"INSERT INTO {CH_TABLE} VALUES", data)
    print(f"‚úÖ –ó–∞–≥—Ä—É–∂–µ–Ω–æ {len(data)} —Å—Ç—Ä–æ–∫ –≤ —Ç–∞–±–ª–∏—Ü—É '{CH_TABLE}'.")

def connect_to_superset():
    print(f"""
–¢–µ–ø–µ—Ä—å –ø–æ–¥–∫–ª—é—á–∏—Ç–µ —Ç–∞–±–ª–∏—Ü—É –≤ Superset:

1. –ü–µ—Ä–µ–π–¥–∏—Ç–µ –≤: Data ‚Üí Datasets
2. –ù–∞–∂–º–∏—Ç–µ: + Dataset
3. –í—ã–±–µ—Ä–∏—Ç–µ –∏—Å—Ç–æ—á–Ω–∏–∫ ClickHouse
4. –ë–∞–∑–∞ –¥–∞–Ω–Ω—ã—Ö: {CH_DATABASE}
5. –¢–∞–±–ª–∏—Ü–∞: {CH_TABLE}
6. –ù–∞–∂–º–∏—Ç–µ Save ‚Äî –∏ –º–æ–∂–Ω–æ —Å—Ç—Ä–æ–∏—Ç—å –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–∏!
    """)

create_ch_table()
upload_to_ch()
connect_to_superset()


üóë –¢–∞–±–ª–∏—Ü–∞ 'e_otinish_clean' —É–¥–∞–ª–µ–Ω–∞ (–µ—Å–ª–∏ —Å—É—â–µ—Å—Ç–≤–æ–≤–∞–ª–∞).
‚úÖ –ó–∞–≥—Ä—É–∂–µ–Ω–æ 10000 —Å—Ç—Ä–æ–∫ –≤ —Ç–∞–±–ª–∏—Ü—É 'e_otinish_clean'.

–¢–µ–ø–µ—Ä—å –ø–æ–¥–∫–ª—é—á–∏—Ç–µ —Ç–∞–±–ª–∏—Ü—É –≤ Superset:

1. –ü–µ—Ä–µ–π–¥–∏—Ç–µ –≤: Data ‚Üí Datasets
2. –ù–∞–∂–º–∏—Ç–µ: + Dataset
3. –í—ã–±–µ—Ä–∏—Ç–µ –∏—Å—Ç–æ—á–Ω–∏–∫ ClickHouse
4. –ë–∞–∑–∞ –¥–∞–Ω–Ω—ã—Ö: default
5. –¢–∞–±–ª–∏—Ü–∞: e_otinish_clean
6. –ù–∞–∂–º–∏—Ç–µ Save ‚Äî –∏ –º–æ–∂–Ω–æ —Å—Ç—Ä–æ–∏—Ç—å –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–∏!
    
