In [7]:
import json
import datetime as dt
import pandas as pd
from pyspark.sql.functions import col, monotonically_increasing_id, row_number
from pyspark.sql.window import Window
from pyspark.sql.types import *

In [4]:
JDBC_DW = {
    'url':'jdbc:oracle:thin:@realestate_high?TNS_ADMIN=/home/big/study/db/Wallet_REALESTATE'
    ,'props':{
        'user':'dw_realestate'
       ,'password':'123qwe!@#QWE'
    }   
}

JDBC_DM = {
    'url':'jdbc:oracle:thin:@realestate_high?TNS_ADMIN=/home/big/study/db/Wallet_REALESTATE'
    ,'props':{
        'user':'dm_realestate'
       ,'password':'123qwe!@#QWE'
    }   
}

In [5]:
sex_ages = spark.read.jdbc(url=JDBC_DW['url'], table='OWN_SEX_AGE', properties=JDBC_DW['props'])
sex_ages.show()
sex_ages.createOrReplaceTempView("sex_ages")



+-------+-------------------+-------------+---------+----------------+---+
|OSA_IDX|           RES_DATE|RES_REGN_CODE|BUYER_SEX|      BUYER_AGES|TOT|
+-------+-------------------+-------------+---------+----------------+---+
|      1|2022-09-23 00:00:00|        42000|     여자|       19세~29세|  7|
|      2|2022-09-23 00:00:00|        42000|     여자|       30세~39세| 24|
|      3|2022-09-23 00:00:00|        42000|     여자|       40세~49세| 30|
|      4|2022-09-23 00:00:00|        42000|     여자|       50세~59세| 50|
|      5|2022-09-23 00:00:00|        42000|     여자|       60세~69세| 56|
|      6|2022-09-23 00:00:00|        42000|     여자|       70세 이상| 18|
|      7|2022-09-23 00:00:00|        42000|     남자|0~18세(미성년자)|  4|
|      8|2022-09-23 00:00:00|        42000|     남자|       19세~29세|  7|
|      9|2022-09-23 00:00:00|        42000|     남자|       30세~39세| 27|
|     10|2022-09-23 00:00:00|        42000|     남자|       40세~49세| 30|
|     11|2022-09-23 00:00:00|        42000|     남자|       50세~59세| 59

### ACC_SELL_BUY_AGES

In [8]:
ages = spark.sql("""select BUYER_AGES as AGES, sum(TOT) as BUY_TOT , round((sum(TOT)/(select sum(TOT) from sex_ages)*100),1) as BUY_RATE
from sex_ages group by BUYER_AGES""")
ages = ages.withColumn('ASBA_IDX', row_number().over(Window.orderBy(monotonically_increasing_id())))
ages.show()

                                                                                

+----------------+-------+--------+--------+
|            AGES|BUY_TOT|BUY_RATE|ASBA_IDX|
+----------------+-------+--------+--------+
|       30세~39세|   1047|    19.4|       1|
|0~18세(미성년자)|      5|     0.1|       2|
|       50세~59세|   1402|    26.0|       3|
|       40세~49세|   1308|    24.3|       4|
|       60세~69세|   1003|    18.6|       5|
|       70세 이상|    363|     6.7|       6|
|       19세~29세|    257|     4.8|       7|
+----------------+-------+--------+--------+



In [9]:
ages.write.jdbc(url=JDBC_DM['url'], table='ACC_SELL_BUY_AGES', mode='append', properties=JDBC_DM['props'])

                                                                                

### SELL_BUY_AGES_YEAR

In [10]:
ages_year = spark.sql("""select BUYER_AGES as AGES, SUM(TOT) AS BUY_TOT, (select year(res_date) from sex_ages group by year(res_date)) as YEAR from sex_ages group by BUYER_AGES""")
ages_year = ages_year.withColumn('SBAY_IDX', row_number().over(Window.orderBy(monotonically_increasing_id())))
ages_year.show()

[Stage 21:>                                                         (0 + 1) / 1]

+----------------+-------+----+--------+
|            AGES|BUY_TOT|YEAR|SBAY_IDX|
+----------------+-------+----+--------+
|       30세~39세|   1047|2022|       1|
|0~18세(미성년자)|      5|2022|       2|
|       50세~59세|   1402|2022|       3|
|       40세~49세|   1308|2022|       4|
|       60세~69세|   1003|2022|       5|
|       70세 이상|    363|2022|       6|
|       19세~29세|    257|2022|       7|
+----------------+-------+----+--------+



                                                                                

In [11]:
ages_year.write.jdbc(url=JDBC_DM['url'], table='SELL_BUY_AGES_YEAR', mode='append', properties=JDBC_DM['props'])

                                                                                

### ACC_SELL_BUY_AGES_SIDO

In [12]:
ages_sido = spark.sql("""select BUYER_AGES as AGES, sum(TOT) as BUY_TOT , RES_REGN_CODE as REGN
from sex_ages group by BUYER_AGES, RES_REGN_CODE order by BUYER_AGES""")
ages_sido = ages_sido.withColumn('ASBAS_IDX', row_number().over(Window.orderBy(monotonically_increasing_id())))
ages_sido.show()

[Stage 38:>                                                         (0 + 1) / 1]

+----------------+-------+-----+---------+
|            AGES|BUY_TOT| REGN|ASBAS_IDX|
+----------------+-------+-----+---------+
|0~18세(미성년자)|      1|11000|        1|
|0~18세(미성년자)|      4|42000|        2|
|       19세~29세|     14|42000|        3|
|       19세~29세|      3|30000|        4|
|       19세~29세|     68|41000|        5|
|       19세~29세|      1|36110|        6|
|       19세~29세|      5|29000|        7|
|       19세~29세|     18|44000|        8|
|       19세~29세|     37|11000|        9|
|       19세~29세|      5|50000|       10|
|       19세~29세|      6|45000|       11|
|       19세~29세|     22|28000|       12|
|       19세~29세|      4|31000|       13|
|       19세~29세|     13|46000|       14|
|       19세~29세|     15|47000|       15|
|       19세~29세|      9|27000|       16|
|       19세~29세|     13|43000|       17|
|       19세~29세|     11|48000|       18|
|       19세~29세|     13|26000|       19|
|       30세~39세|     28|43000|       20|
+----------------+-------+-----+---------+
only showing t

                                                                                

In [13]:
ages_sido.write.jdbc(url=JDBC_DM['url'], table='ACC_SELL_BUY_AGES_SIDO', mode='append', properties=JDBC_DM['props'])

                                                                                

### ACC_SELL_BUY_SEX

In [18]:
sex = spark.sql("""select BUYER_SEX as SEX, sum(TOT) as BUY_TOT , round((sum(TOT)/(select sum(TOT) from sex_ages)*100),1) as BUY_RATE
from sex_ages group by BUYER_SEX""")
sex = sex.withColumn('ASBS_IDX', row_number().over(Window.orderBy(monotonically_increasing_id())))
sex.show()

[Stage 68:>                                                         (0 + 1) / 1]

+----+-------+--------+--------+
| SEX|BUY_TOT|BUY_RATE|ASBS_IDX|
+----+-------+--------+--------+
|여자|   2407|    44.7|       1|
|남자|   2978|    55.3|       2|
+----+-------+--------+--------+



                                                                                

In [20]:
sex.write.jdbc(url=JDBC_DM['url'], table='ACC_SELL_BUY_SEX', mode='append', properties=JDBC_DM['props'])

                                                                                

### SELL_BUY_SEX_YEAR

In [21]:
sex_year = spark.sql("""select BUYER_SEX as SEX, SUM(TOT) AS BUY_TOT, (select year(res_date) from sex_ages group by year(res_date)) as YEAR from sex_ages group by BUYER_SEX""")
sex_year = sex_year.withColumn('SBSY_IDX', row_number().over(Window.orderBy(monotonically_increasing_id())))
sex_year.show()

[Stage 83:>                                                         (0 + 1) / 1]

+----+-------+----+--------+
| SEX|BUY_TOT|YEAR|SBSY_IDX|
+----+-------+----+--------+
|여자|   2407|2022|       1|
|남자|   2978|2022|       2|
+----+-------+----+--------+



                                                                                

In [23]:
sex_year.write.jdbc(url=JDBC_DM['url'], table='SELL_BUY_SEX_YEAR', mode='append', properties=JDBC_DM['props'])

                                                                                

### ACC_SELL_BUY_SEX_SIDO

In [27]:
sex_sido = spark.sql("""select BUYER_SEX as SEX, sum(TOT) as BUY_TOT , RES_REGN_CODE as REGN
from sex_ages group by BUYER_SEX, RES_REGN_CODE order by sex""")
sex_sido.show()

                                                                                

+----+-------+-----+
| SEX|BUY_TOT| REGN|
+----+-------+-----+
|남자|     29|30000|
|남자|    664|41000|
|남자|    155|45000|
|남자|    193|28000|
|남자|    183|42000|
|남자|    127|26000|
|남자|     16|36110|
|남자|     23|31000|
|남자|    259|46000|
|남자|    268|48000|
|남자|    142|43000|
|남자|    239|44000|
|남자|     41|29000|
|남자|    232|11000|
|남자|    251|47000|
|남자|     88|27000|
|남자|     68|50000|
|여자|    185|42000|
|여자|     37|29000|
|여자|    186|48000|
+----+-------+-----+
only showing top 20 rows



In [28]:
sex_sido.write.jdbc(url=JDBC_DM['url'], table='ACC_SELL_BUY_SEX_SIDO', mode='overwrite', properties=JDBC_DM['props'])

                                                                                