In [1]:
import json
import psycopg2
import pandas as pd

In [2]:
def load_db():
  with open("/content/drive/MyDrive/yeoreodigm/data_files/db_info.json") as json_file:
    DB_INFO = json.load(json_file)
  
  endpoint = DB_INFO["ENDPOINT"]
  dbname = DB_INFO["DB_NAME"]
  user = DB_INFO["USER_ID"]
  password = DB_INFO["PASSWORD"]
  db = psycopg2.connect(host=endpoint,dbname=dbname,user=user,password=password)
  return db

In [3]:
db = load_db()
cursor = db.cursor()

In [4]:
cursor.execute("select * from association_rule")
result = cursor.fetchall()
rule = pd.DataFrame(result)
rule.columns = ['id','antecedents','consequents']

In [5]:
rule.head(3)

Unnamed: 0,id,antecedents,consequents
0,1,[31],[134]
1,2,[134],[31]
2,3,[25],[134]


In [6]:
rule['len'] = rule['consequents'].apply(lambda x: len(x))
rule['len'].describe()

count    23546.000000
mean         1.866219
std          0.760698
min          1.000000
25%          1.000000
50%          2.000000
75%          2.000000
max          5.000000
Name: len, dtype: float64

In [7]:
rule.sample(4)

Unnamed: 0,id,antecedents,consequents,len
1980,1981,"[10, 24]",[9],1
22499,22500,"[5, 15]","[3, 8, 9, 13]",4
7870,7871,"[8, 15, 223]",[9],1
15119,15120,"[13, 15]","[3, 9, 16]",3


In [8]:
course = pd.read_csv("/content/drive/MyDrive/yeoreodigm/data_files/08.21_course.csv",index_col=0)

In [9]:
course['places'] = course['places'].apply(lambda x: x[1:-1].split(','))

In [10]:
course['places'] = course['places'].apply(lambda x : list(map(int,x)))

In [11]:
course.head(2)

Unnamed: 0,id,places,theme,cluster,score
0,1,"[602, 10, 58, 29]","[0.25, 0.25, 0.5, 0.0, 0.25, 0.0, 0.25, 0.5, 0...",10,167322
1,2,"[58, 37, 10, 19, 602, 60, 29]","[0.14285714285714285, 0.14285714285714285, 0.5...",7,163116


In [12]:
entire_basket = []
from tqdm import tqdm
for input in tqdm(course['places'].values):
  same_basket = []
  for val,attr in zip(rule['antecedents'].values,rule['consequents'].values):
    match_rate = len(set(input).intersection(set(val))) /len(val)
    if match_rate >=1.0 :
      same_basket += list(attr)
  same_basket = set(list(same_basket))
  recommend = list(same_basket - set(input)) #이미 일정에 있는애들은 연관규칙 기반 추천에서 제외
  entire_basket.append(recommend)

100%|██████████| 3725/3725 [01:58<00:00, 31.40it/s]


In [13]:
course['rule_result'] = entire_basket
course['test_len'] = course['rule_result'].apply(lambda x: len(x))
course['test_len'].describe()

count    3725.000000
mean       24.651812
std         6.016880
min         0.000000
25%        22.000000
50%        26.000000
75%        29.000000
max        35.000000
Name: test_len, dtype: float64

In [15]:
#1번방법
def recommend_by_assoc_rule1(input,association_rule):
  recommend_list = []
  
  for now_rule in association_rule.values:
    antecedents,consequents = now_rule[1],now_rule[2]
    match_rate = len(set(input).intersection(set(antecedents))) / len(antecedents)
    if match_rate >=1.0 : #cause를 가진다면
      #print(consequents)
      recommend_list += consequents
  #print(set(recommend_list))
  recommend_list = list(set(recommend_list) - set(input))
  
  return recommend_list

In [11]:
input = [34,50,424,310,874,2,6]

In [16]:
for i in range(200):
  recommend_by_assoc_rule1(input,rule)

In [17]:
#2번방법
def recommend_by_assoc_rule2(input,association_rule):
  recommend_list = []
  for antecedents,consequents in zip(association_rule['antecedents'].values,association_rule['consequents'].values):
    match_rate = len(set(input).intersection(set(antecedents))) / len(antecedents)
    if match_rate >=1.0 : #cause를 가진다면
      #print(consequents)
      recommend_list += consequents
      
  #print(set(recommend_list))
  recommend_list = list(set(recommend_list) - set(input))
  
  return recommend_list

In [18]:
for i in range(200):
  recommend_by_assoc_rule2(input,rule)

##2번방법이 더 빠르므로 이걸로 채택 및 테스트

In [19]:
def load_association_rule(db):
  sql = "select * from association_rule"
  cursor = db.cursor()
  cursor.execute(sql)
  result = cursor.fetchall()
  rule = pd.DataFrame(result)
  rule.columns = ['id','antecedents','consequents']
  return rule

def recommend_by_assoc_rule(input,association_rule):
  recommend_list = []
  for antecedents,consequents in zip(association_rule['antecedents'].values,association_rule['consequents'].values):
    match_rate = len(set(input).intersection(set(antecedents))) / len(antecedents)
    if match_rate >=1.0 : #cause를 가진다면
      #print(consequents)
      recommend_list += consequents
      
  #print(set(recommend_list))
  recommend_list = list(set(recommend_list) - set(input))
  
  return recommend_list

In [20]:
my_input = [3,5,134,10,78,18,22,310]
recommend_by_assoc_rule(my_input,rule)

[1,
 2,
 4,
 6,
 8,
 9,
 13,
 15,
 16,
 17,
 19,
 20,
 21,
 24,
 25,
 28,
 29,
 31,
 37,
 174,
 49,
 179,
 58,
 59,
 61,
 76,
 336,
 80,
 83,
 219,
 223]