In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 필요한 모듈 import

import pandas as pd
import numpy as np
from numpy import dot
from numpy.linalg import norm
import collections, operator

In [None]:
# 모든 경우의 수에 대한 원 핫 인코딩 DataFrame 만들기
'''
이 DataFrame으로 User가 웹에서 만들어낼 수 있는 모든 경우의 수에 대한 결과를 저장해놓고,
바로바로 사용자가 추천 결과를 확인할 수 있도록 하였음.

이 Dataframe의 각 line마다 인플루언서 100명과의 코사인 유사도를 계산한 후,
코사인 유사도가 가장 높은 상위 3명을 옆 column으로 추가할 것임.
'''

def user_encoding(style_num, tone_num, season_num):
  '''
  input 값
  - style_num: 스타일 카테고리 개수
  - tone_num: 톤 카테고리 개수
  - season_num: 계절 카테고리 개수
  '''

  # 각 카테고리의 개수만큼의 길이를 가진 dummy list를 생성
  style_list = [0] * style_num
  tone_list = [0] * tone_num
  season_list = [0] * season_num

  # 각 카테고리의 개수만큼의 길이를 가진 column 이름을 만든 후, list에 저장
  style_column = [('style_' + str(i)) for i in range(len(style_list))]
  tone_column = [('tone_' + str(i)) for i in range(len(tone_list))]
  season_column = [('season_' + str(i)) for i in range(len(season_list))]

  # 위에서 만든 column들을 가지는 빈 Dataframe을 생성
  df = pd.DataFrame(columns = (style_column + tone_column + season_column))

  # 모든 경우의 수를 가지는 원 핫 인코딩 Dataframe을 생성
  # 각 카테고리마다 하나의 태그는 1을, 나머지는 0을 가지게 됨. 
  num = 0
  for i in range(len(style_list)):
    style_list = [0] * style_num
    style_list[i] = 1
    for j in range(len(tone_list)):
      tone_list = [0] * tone_num
      tone_list[j] = 1
      for k in range(len(season_list)):
        season_list[k] = 1
        df.loc[num] = (style_list + tone_list + season_list)
        num += 1
        season_list = [0] * season_num

  # 최종적으로 만들어진 Dataframe 반환
  return df

# 남자 인플루언서 추천 알고리즘

In [None]:
# 남자 인플루언서 Database 불러오기
man_data = pd.read_csv('/content/drive/MyDrive/2023 KUIAI 해커톤/final_man_influ.csv', index_col = 0)
man_data

Unnamed: 0,influ_id,influ_pic_id,style,tone,season
0,@one_r_k,@one_r_k_3_half.png,street,mono,autumn
1,@oohsehun,@oohsehun_3_half.png,loose,mono,summer
2,@zxcvr0626,@zxcvr0626_11_half.png,street,neutral,winter
3,@j0ng_wo0,@j0ng_wo0_3_half.png,street,mono,summer
4,@hmm.__.u,@hmm.__.u_22_half.png,formal,mono,summer
...,...,...,...,...,...
786,@zxcvr0626,@zxcvr0626_20_half.png,street,mono,winter
787,@__my_t__,@__my_t___27_half.png,street,mono,summer
788,@d_soms,@d_soms_4_half.png,street,neutral,autumn
789,@oytoyt_,@oytoyt__2_half.png,loose,mono,winter


In [None]:
# 남자 인플루언서들의 인스타그램 아이디를 추출
man_ids = list(set(list(man_data['influ_id'])))
man_ids

['@moodonx2',
 '@dbs.ycaa',
 '@1ungwoo',
 '@hmm.__.u',
 '@odor_bubu',
 '@kj_m.w',
 '@hotneul',
 '@kimchiz_man',
 '@s.s_jun',
 '@oohsehun',
 '@win.on_',
 '@oneh6_',
 '@oytoyt_',
 '@m_n__day',
 '@sualboys',
 '@j0ng_wo0',
 '@yangkoon__dl',
 '@jindalorian',
 '@yummy.__.do',
 '@bacajini',
 '@wnsgurrha',
 '@my_own_way_____',
 '@one_r_k',
 '@tt__yl',
 '@dismas_',
 '@zxcvr0626',
 '@hodu__jwan',
 '@grey_woo9',
 '@d_soms',
 '@mavlfit',
 '@yj_mark',
 '@k_8_8bsoo',
 '@uuuuk_2_',
 '@dosirak_hansol',
 '@_mgi_closet_',
 '@_jongh0',
 '@bbo9ni',
 '@jin_pyo_is',
 '@lil_0uzi_vert',
 '@bejoon0',
 '@kimyannnnngh',
 '@so_j2',
 '@geol_dong',
 '@__my_t__',
 '@59seok',
 '@malko_bee']

## Man / Style

In [None]:
# 남자의 스타일에 해당하는 스타일 태그를 list type으로 저장
style_tag_list = ['americancasual','casual','dandy','formal','loose','street']

# 위의 list를 column으로 가지는 빈 Dataframe 생성
df_man_style = pd.DataFrame(columns = style_tag_list)

# Classifier를 통해 각 인플루언서의 피드 이미지마다의 스타일을 추출 -> 각 스타일의 빈도 수를 전체 빈도 수로 나누어 Dataframe에 추가
for i in range(len(man_ids)):
  a = man_data[man_data['influ_id'] == man_ids[i]]
  result = dict(a['style'].value_counts())
  df_man_style.loc[i] = result
  df_man_style.loc[i] /= len(a)
# NaN은 인플루언서가 해당 스타일을 가진 피드 이미지가 없다는 것

In [None]:
df_man_style

Unnamed: 0,americancasual,casual,dandy,formal,loose,street
0,0.111111,,,0.222222,0.222222,0.444444
1,,0.083333,0.083333,0.083333,0.416667,0.333333
2,,0.071429,,0.071429,0.142857,0.714286
3,0.066667,,0.066667,0.166667,0.2,0.5
4,,0.090909,,,0.181818,0.727273
5,,,,0.230769,0.615385,0.153846
6,0.125,,,0.125,0.125,0.625
7,0.076923,0.153846,,,0.230769,0.538462
8,,,,0.333333,,0.666667
9,,,,0.2,0.6,0.2


## Man / Tone

In [None]:
# 남자의 톤에 해당하는 톤 태그를 list type으로 저장
tone_tag_list = ['mono','neutral','pastel','toneon']

# 위의 list를 column으로 가지는 빈 Dataframe 생성
df_man_tone = pd.DataFrame(columns = tone_tag_list)

# Classifier를 통해 각 인플루언서의 피드 이미지마다의 톤을 추출 -> 각 톤의 빈도 수를 전체 빈도 수로 나누어 Dataframe에 추가
for i in range(len(man_ids)):
  a = man_data[man_data['influ_id'] == man_ids[i]]
  result = dict(a['tone'].value_counts())
  df_man_tone.loc[i] = result
  df_man_tone.loc[i] /= len(a)
# NaN은 인플루언서가 해당 톤을 가진 피드 이미지가 없다는 것

In [None]:
df_man_tone

Unnamed: 0,mono,neutral,pastel,toneon
0,0.777778,0.111111,,0.111111
1,0.75,0.25,,
2,0.428571,0.357143,,0.214286
3,0.733333,0.166667,0.066667,0.033333
4,0.636364,0.090909,0.272727,
5,0.615385,0.230769,0.153846,
6,0.5625,0.125,0.25,0.0625
7,0.538462,0.153846,0.230769,0.076923
8,0.666667,,0.333333,
9,0.8,,0.2,


## Man / Season

In [None]:
# 계절감에 해당하는 계절 태그를 list type으로 저장 ('autumn'은 봄과 가을 모두에 해당하는 태그)
season_tag_list = ['summer','autumn','winter']

# 위의 list를 column으로 가지는 빈 Dataframe 생성
df_man_season = pd.DataFrame(columns = season_tag_list)

# Classifier를 통해 각 인플루언서의 피드 이미지마다의 계절감을 추출 -> 각 계절감의 빈도 수를 전체 빈도 수로 나누어 Dataframe에 추가
for i in range(len(man_ids)):
  a = man_data[man_data['influ_id'] == man_ids[i]]
  result = dict(a['season'].value_counts())
  df_man_season.loc[i] = result
  df_man_season.loc[i] /= len(a)
# NaN은 인플루언서가 해당 스타일을 가진 계절감 이미지가 없다는 것

In [None]:
df_man_season

Unnamed: 0,summer,autumn,winter
0,0.555556,0.111111,0.333333
1,0.25,0.416667,0.333333
2,0.071429,0.357143,0.571429
3,0.366667,0.533333,0.1
4,0.363636,0.181818,0.454545
5,0.230769,0.615385,0.153846
6,0.125,0.3125,0.5625
7,0.230769,0.153846,0.615385
8,0.333333,0.666667,
9,0.6,0.2,0.2


## Man DataFrame Concat

In [None]:
# 3개의 카테고리에 해당하는 Dataframe을 병합
df_man_total = pd.concat([df_man_style, df_man_tone, df_man_season], axis = 1)

# NaN 값을 0으로 치환
df_man_total = df_man_total.fillna(0)

# 각 row에 인스타그램 id 매칭
df_man_total['ids'] = man_ids

In [None]:
df_man_total

Unnamed: 0,americancasual,casual,dandy,formal,loose,street,mono,neutral,pastel,toneon,summer,autumn,winter,ids
0,0.111111,0.0,0.0,0.222222,0.222222,0.444444,0.777778,0.111111,0.0,0.111111,0.555556,0.111111,0.333333,@moodonx2
1,0.0,0.083333,0.083333,0.083333,0.416667,0.333333,0.75,0.25,0.0,0.0,0.25,0.416667,0.333333,@dbs.ycaa
2,0.0,0.071429,0.0,0.071429,0.142857,0.714286,0.428571,0.357143,0.0,0.214286,0.071429,0.357143,0.571429,@1ungwoo
3,0.066667,0.0,0.066667,0.166667,0.2,0.5,0.733333,0.166667,0.066667,0.033333,0.366667,0.533333,0.1,@hmm.__.u
4,0.0,0.090909,0.0,0.0,0.181818,0.727273,0.636364,0.090909,0.272727,0.0,0.363636,0.181818,0.454545,@odor_bubu
5,0.0,0.0,0.0,0.230769,0.615385,0.153846,0.615385,0.230769,0.153846,0.0,0.230769,0.615385,0.153846,@kj_m.w
6,0.125,0.0,0.0,0.125,0.125,0.625,0.5625,0.125,0.25,0.0625,0.125,0.3125,0.5625,@hotneul
7,0.076923,0.153846,0.0,0.0,0.230769,0.538462,0.538462,0.153846,0.230769,0.076923,0.230769,0.153846,0.615385,@kimchiz_man
8,0.0,0.0,0.0,0.333333,0.0,0.666667,0.666667,0.0,0.333333,0.0,0.333333,0.666667,0.0,@s.s_jun
9,0.0,0.0,0.0,0.2,0.6,0.2,0.8,0.0,0.2,0.0,0.6,0.2,0.2,@oohsehun


## Man 정답표 구성하기

In [None]:
# 코사인 유사도를 계산하는 함수
def cos_sim(a, b):
    return dot(a, b)/(norm(a)*norm(b))

In [None]:
# user(사용자로부터 입력받는 input 데이터) -> 더미변수를 이용하여 모든 경우의 수를 데이터프레임으로 생성
users = user_encoding(6, 4, 3)
'''
남성의 경우,

style: 6개
tone: 4개
season: 3개
'''
users

Unnamed: 0,style_0,style_1,style_2,style_3,style_4,style_5,tone_0,tone_1,tone_2,tone_3,season_0,season_1,season_2
0,1,0,0,0,0,0,1,0,0,0,1,0,0
1,1,0,0,0,0,0,1,0,0,0,0,1,0
2,1,0,0,0,0,0,1,0,0,0,0,0,1
3,1,0,0,0,0,0,0,1,0,0,1,0,0
4,1,0,0,0,0,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,0,0,0,0,0,1,0,0,1,0,0,1,0
68,0,0,0,0,0,1,0,0,1,0,0,0,1
69,0,0,0,0,0,1,0,0,0,1,1,0,0
70,0,0,0,0,0,1,0,0,0,1,0,1,0


In [None]:
# 상위 3명의 인플루언서 정보를 저장할 빈 Dataframe 생성
# 인플루언서 인스타그램 id와 코사인 유사도 계산 값을 저장
i_result_df = pd.DataFrame(columns = ['first_influ','first_score','second_influ','second_score','third_influ','third_score'])

for i in range(len(users)):
  i_result_dict = {}
  i_line = []
  user = users.iloc[i]
  for j in range(len(df_man_total)):
    target = df_man_total.iloc[j].to_list()[:-1]
    result_cos = cos_sim(user, target)
    i_result_dict[j] = result_cos
  i_result_dict = sorted(i_result_dict.items(), key = operator.itemgetter(1), reverse = True)
  for rank in range(3):
    i_line.append(df_man_total['ids'].iloc[i_result_dict[rank][0]])
    i_line.append(round(i_result_dict[rank][1], 2))
  i_result_df.loc[i] = i_line

In [None]:
i_result_df

Unnamed: 0,first_influ,first_score,second_influ,second_score,third_influ,third_score
0,@moodonx2,0.71,@m_n__day,0.69,@uuuuk_2_,0.67
1,@tt__yl,0.74,@hmm.__.u,0.67,@kj_m.w,0.61
2,@jin_pyo_is,0.69,@yangkoon__dl,0.69,@kimchiz_man,0.65
3,@win.on_,0.56,@59seok,0.48,@d_soms,0.48
4,@mavlfit,0.43,@oytoyt_,0.43,@grey_woo9,0.42
...,...,...,...,...,...,...
67,@s.s_jun,0.75,@sualboys,0.63,@hotneul,0.61
68,@hotneul,0.74,@wnsgurrha,0.73,@kimchiz_man,0.73
69,@uuuuk_2_,0.67,@oneh6_,0.65,@yummy.__.do,0.60
70,@1ungwoo,0.64,@s.s_jun,0.60,@sualboys,0.60


In [None]:
# 모든 경우의 수를 담은 Dataframe과 결과 Dataframe을 매칭하여 하나의 Dataframe으로 저장 후
# csv 파일로 내보내서 최종적인 Database 구축
man_answer = pd.concat([users, i_result_df], axis = 1)
man_answer.to_csv('/content/drive/MyDrive/2023 KUIAI 해커톤/final_man_dataset.csv')
man_answer

Unnamed: 0,style_0,style_1,style_2,style_3,style_4,style_5,tone_0,tone_1,tone_2,tone_3,season_0,season_1,season_2,first_influ,first_score,second_influ,second_score,third_influ,third_score
0,1,0,0,0,0,0,1,0,0,0,1,0,0,@moodonx2,0.71,@m_n__day,0.69,@uuuuk_2_,0.67
1,1,0,0,0,0,0,1,0,0,0,0,1,0,@tt__yl,0.74,@hmm.__.u,0.67,@kj_m.w,0.61
2,1,0,0,0,0,0,1,0,0,0,0,0,1,@jin_pyo_is,0.69,@yangkoon__dl,0.69,@kimchiz_man,0.65
3,1,0,0,0,0,0,0,1,0,0,1,0,0,@win.on_,0.56,@59seok,0.48,@d_soms,0.48
4,1,0,0,0,0,0,0,1,0,0,0,1,0,@mavlfit,0.43,@oytoyt_,0.43,@grey_woo9,0.42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,0,0,0,0,0,1,0,0,1,0,0,1,0,@s.s_jun,0.75,@sualboys,0.63,@hotneul,0.61
68,0,0,0,0,0,1,0,0,1,0,0,0,1,@hotneul,0.74,@wnsgurrha,0.73,@kimchiz_man,0.73
69,0,0,0,0,0,1,0,0,0,1,1,0,0,@uuuuk_2_,0.67,@oneh6_,0.65,@yummy.__.do,0.60
70,0,0,0,0,0,1,0,0,0,1,0,1,0,@1ungwoo,0.64,@s.s_jun,0.60,@sualboys,0.60


# 여자 인플루언서 추천 알고리즘

위의 남자 인플루언서 추천 알고리즘과 동일한 방식과 코드를 사용하므로 추가적으로 자세한 주석을 생략함.

In [None]:
woman_data = pd.read_csv('/content/drive/MyDrive/2023 KUIAI 해커톤/final_woman_influ.csv', index_col = 0)
woman_data

Unnamed: 0,influ_id,influ_pic_id,style,tone,season
0,@belleofcloset,@belleofcloset_9_half.png,romantic,mono,winter
1,@__v.yuum_look__,@__v.yuum_look___12_half.png,romantic,mono,winter
2,@98.c_project,@98.c_project_12_half.png,casual,pastel,autumn
3,@__v.yuum_look__,@__v.yuum_look___9_half.png,formal,toneon,winter
4,@kxyxn,@kxyxn_20_half.png,casual,mono,summer
...,...,...,...,...,...
616,@kxyxn,@kxyxn_8_half.png,casual,mono,autumn
617,@__v.yuum_look__,@__v.yuum_look___4_half.png,chic,neutral,winter
618,@belleofcloset,@belleofcloset_4_half.png,casual,pastel,autumn
619,__02x02,__02x02_4_half.png,casual,mono,summer


In [None]:
# 여자 인플루언서들의 인스타그램 아이디를 추출
woman_ids = list(set(list(woman_data['influ_id'])))
woman_ids

['@bloggerbok',
 '@siaestival',
 '@belleofcloset',
 '@eungil_j',
 '@ba_serin_e',
 '@e_wolly',
 '@ap.s_fi1st',
 '@by_he.nique',
 '@lxx.s.y_',
 '@_yourthen',
 '@tlrou.vee',
 '@antmousbe9',
 '@ro.seon',
 '@mini.d31',
 '@rupinydaily',
 '__02x02',
 '@_zia_mood',
 '@0nefence',
 '@ruri.kim',
 '@hwi____ii',
 '@__v.yuum_look__',
 '@337janggoon',
 '@jin.wonder',
 '@yulkeem___',
 '@so_h_appy',
 '@y_mood_h',
 '@jung_staas',
 '@hamnihouse',
 '@kxyxn',
 '@lamode.seoul',
 '@hawl_0.s',
 '@166.ootd',
 '@xixxeeonx_4',
 '@loolinmx',
 '@rozley._.y',
 '@amourfor_u',
 '@slglf',
 '@z___meme',
 '@yeenstyle_',
 '@arcco_iris_',
 '@chaileeson',
 '@velyjuu',
 '@s_uz_.zzy__2',
 '@jelly_wony',
 '@98.c_project',
 '@c_eunnnnnnn']

## Woman / Style

In [None]:
# 여자의 스타일에 해당하는 스타일 태그를 list type으로 저장
style_tag_list = ['casual','romantic','girlish','chic','street','formal','loose']

# 위의 list를 column으로 가지는 빈 Dataframe 생성
df_woman_style = pd.DataFrame(columns = style_tag_list)

# Classifier를 통해 각 인플루언서의 피드 이미지마다의 스타일을 추출 -> 각 스타일의 빈도 수를 전체 빈도 수로 나누어 Dataframe에 추가
for i in range(len(woman_ids)):
  a = woman_data[woman_data['influ_id'] == woman_ids[i]]
  result = dict(a['style'].value_counts())
  df_woman_style.loc[i] = result
  df_woman_style.loc[i] /= len(a)
# NaN은 인플루언서가 해당 스타일을 가진 피드 이미지가 없다는 것

In [None]:
df_woman_style

Unnamed: 0,casual,romantic,girlish,chic,street,formal,loose
0,0.692308,0.153846,,,0.153846,,
1,0.666667,0.222222,0.111111,,,,
2,0.470588,0.176471,0.176471,0.117647,,,0.058824
3,0.869565,0.043478,,,0.086957,,
4,0.916667,0.083333,,,,,
5,0.818182,0.090909,,0.090909,,,
6,0.8,0.1,0.1,,,,
7,,0.2,0.4,0.4,,,
8,0.333333,0.166667,0.333333,0.166667,,,
9,0.392857,0.285714,0.25,0.071429,,,


## Woman / Tone

In [None]:
# 여자의 톤에 해당하는 톤 태그를 list type으로 저장
tone_tag_list = ['mono','neutral','pastel','toneon']

# 위의 list를 column으로 가지는 빈 Dataframe 생성
df_woman_tone = pd.DataFrame(columns = tone_tag_list)

# Classifier를 통해 각 인플루언서의 피드 이미지마다의 톤을 추출 -> 각 톤의 빈도 수를 전체 빈도 수로 나누어 Dataframe에 추가
for i in range(len(woman_ids)):
  a = woman_data[woman_data['influ_id'] == woman_ids[i]]
  result = dict(a['tone'].value_counts())
  df_woman_tone.loc[i] = result
  df_woman_tone.loc[i] /= len(a)
# NaN은 인플루언서가 해당 톤을 가진 피드 이미지가 없다는 것

In [None]:
df_woman_tone

Unnamed: 0,mono,neutral,pastel,toneon
0,0.923077,,0.076923,
1,1.0,,,
2,0.529412,0.117647,0.352941,
3,0.869565,,0.130435,
4,0.833333,0.166667,,
5,0.636364,0.090909,0.272727,
6,0.7,0.2,0.1,
7,0.4,,0.6,
8,0.666667,0.166667,0.166667,
9,0.321429,0.178571,0.5,


## Woman / Season

In [None]:
# 계절감에 해당하는 계절 태그를 list type으로 저장 ('autumn'은 봄과 가을 모두에 해당하는 태그)
season_tag_list = ['summer','autumn','winter']

# 위의 list를 column으로 가지는 빈 Dataframe 생성
df_woman_season = pd.DataFrame(columns = season_tag_list)

# Classifier를 통해 각 인플루언서의 피드 이미지마다의 계절감을 추출 -> 각 계절감의 빈도 수를 전체 빈도 수로 나누어 Dataframe에 추가
for i in range(len(woman_ids)):
  a = woman_data[woman_data['influ_id'] == woman_ids[i]]
  result = dict(a['season'].value_counts())
  df_woman_season.loc[i] = result
  df_woman_season.loc[i] /= len(a)
# NaN은 인플루언서가 해당 스타일을 가진 계절감 이미지가 없다는 것

In [None]:
df_woman_season

Unnamed: 0,summer,autumn,winter
0,0.153846,0.153846,0.692308
1,0.111111,0.333333,0.555556
2,0.588235,0.117647,0.294118
3,0.26087,0.043478,0.695652
4,0.416667,0.333333,0.25
5,0.272727,0.272727,0.454545
6,0.2,0.1,0.7
7,0.4,0.4,0.2
8,0.166667,,0.833333
9,0.214286,0.142857,0.642857


## Woman DataFrame Concat

In [None]:
# 3개의 카테고리에 해당하는 Dataframe을 병합
df_woman_total = pd.concat([df_woman_style, df_woman_tone, df_woman_season], axis = 1)

# NaN 값을 0으로 치환
df_woman_total = df_woman_total.fillna(0)

# 각 row에 인스타그램 id 매칭
df_woman_total['ids'] = woman_ids

In [None]:
df_woman_total

Unnamed: 0,casual,romantic,girlish,chic,street,formal,loose,mono,neutral,pastel,toneon,summer,autumn,winter,ids
0,0.692308,0.153846,0.0,0.0,0.153846,0.0,0.0,0.923077,0.0,0.076923,0.0,0.153846,0.153846,0.692308,@bloggerbok
1,0.666667,0.222222,0.111111,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.111111,0.333333,0.555556,@siaestival
2,0.470588,0.176471,0.176471,0.117647,0.0,0.0,0.058824,0.529412,0.117647,0.352941,0.0,0.588235,0.117647,0.294118,@belleofcloset
3,0.869565,0.043478,0.0,0.0,0.086957,0.0,0.0,0.869565,0.0,0.130435,0.0,0.26087,0.043478,0.695652,@eungil_j
4,0.916667,0.083333,0.0,0.0,0.0,0.0,0.0,0.833333,0.166667,0.0,0.0,0.416667,0.333333,0.25,@ba_serin_e
5,0.818182,0.090909,0.0,0.090909,0.0,0.0,0.0,0.636364,0.090909,0.272727,0.0,0.272727,0.272727,0.454545,@e_wolly
6,0.8,0.1,0.1,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.2,0.1,0.7,@ap.s_fi1st
7,0.0,0.2,0.4,0.4,0.0,0.0,0.0,0.4,0.0,0.6,0.0,0.4,0.4,0.2,@by_he.nique
8,0.333333,0.166667,0.333333,0.166667,0.0,0.0,0.0,0.666667,0.166667,0.166667,0.0,0.166667,0.0,0.833333,@lxx.s.y_
9,0.392857,0.285714,0.25,0.071429,0.0,0.0,0.0,0.321429,0.178571,0.5,0.0,0.214286,0.142857,0.642857,@_yourthen


## Woman 정답표 구성하기

In [None]:
# 코사인 유사도를 계산하는 함수
def cos_sim(a, b):
    return dot(a, b)/(norm(a)*norm(b))

In [None]:
# user(사용자로부터 입력받는 input 데이터) -> 더미변수를 이용하여 모든 경우의 수를 데이터프레임으로 생성
users_woman = user_encoding(7, 4, 3)
'''
여자의 경우,

style: 7개
tone: 4개
season: 3개
'''
users_woman

Unnamed: 0,style_0,style_1,style_2,style_3,style_4,style_5,style_6,tone_0,tone_1,tone_2,tone_3,season_0,season_1,season_2
0,1,0,0,0,0,0,0,1,0,0,0,1,0,0
1,1,0,0,0,0,0,0,1,0,0,0,0,1,0
2,1,0,0,0,0,0,0,1,0,0,0,0,0,1
3,1,0,0,0,0,0,0,0,1,0,0,1,0,0
4,1,0,0,0,0,0,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,0,0,0,0,0,0,1,0,0,1,0,0,1,0
80,0,0,0,0,0,0,1,0,0,1,0,0,0,1
81,0,0,0,0,0,0,1,0,0,0,1,1,0,0
82,0,0,0,0,0,0,1,0,0,0,1,0,1,0


In [None]:
i_result_df_w = pd.DataFrame(columns = ['first_influ','first_score','second_influ','second_score','third_influ','third_score'])

for i in range(len(users_woman)):
  i_result_dict = {}
  i_line = []
  user_w = users_woman.iloc[i]
  for j in range(len(df_woman_total)):
    target = df_woman_total.iloc[j].to_list()[:-1]
    result_cos = cos_sim(user_w, target)
    i_result_dict[j] = result_cos
  i_result_dict = sorted(i_result_dict.items(), key = operator.itemgetter(1), reverse = True)
  for rank in range(3):
    i_line.append(df_woman_total['ids'].iloc[i_result_dict[rank][0]])
    i_line.append(round(i_result_dict[rank][1], 2))
  i_result_df_w.loc[i] = i_line

In [None]:
i_result_df_w

Unnamed: 0,first_influ,first_score,second_influ,second_score,third_influ,third_score
0,@jin.wonder,0.97,@mini.d31,0.94,@chaileeson,0.92
1,@hamnihouse,0.91,@0nefence,0.88,@ba_serin_e,0.87
2,@hwi____ii,0.99,@ro.seon,0.99,@xixxeeonx_4,0.98
3,@ruri.kim,0.66,@mini.d31,0.64,@y_mood_h,0.64
4,@98.c_project,0.61,@hawl_0.s,0.60,@antmousbe9,0.60
...,...,...,...,...,...,...
79,@by_he.nique,0.52,@amourfor_u,0.49,@lamode.seoul,0.46
80,@_yourthen,0.61,@rupinydaily,0.60,@337janggoon,0.58
81,@mini.d31,0.35,@belleofcloset,0.35,@jin.wonder,0.31
82,@hamnihouse,0.34,@by_he.nique,0.21,@jelly_wony,0.20


In [None]:
woman_answer = pd.concat([users_woman, i_result_df_w], axis = 1)
woman_answer.to_csv('/content/drive/MyDrive/2023 KUIAI 해커톤/final_woman_dataset.csv')
woman_answer

Unnamed: 0,style_0,style_1,style_2,style_3,style_4,style_5,style_6,tone_0,tone_1,tone_2,tone_3,season_0,season_1,season_2,first_influ,first_score,second_influ,second_score,third_influ,third_score
0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,@jin.wonder,0.97,@mini.d31,0.94,@chaileeson,0.92
1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,@hamnihouse,0.91,@0nefence,0.88,@ba_serin_e,0.87
2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,@hwi____ii,0.99,@ro.seon,0.99,@xixxeeonx_4,0.98
3,1,0,0,0,0,0,0,0,1,0,0,1,0,0,@ruri.kim,0.66,@mini.d31,0.64,@y_mood_h,0.64
4,1,0,0,0,0,0,0,0,1,0,0,0,1,0,@98.c_project,0.61,@hawl_0.s,0.60,@antmousbe9,0.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,0,0,0,0,0,0,1,0,0,1,0,0,1,0,@by_he.nique,0.52,@amourfor_u,0.49,@lamode.seoul,0.46
80,0,0,0,0,0,0,1,0,0,1,0,0,0,1,@_yourthen,0.61,@rupinydaily,0.60,@337janggoon,0.58
81,0,0,0,0,0,0,1,0,0,0,1,1,0,0,@mini.d31,0.35,@belleofcloset,0.35,@jin.wonder,0.31
82,0,0,0,0,0,0,1,0,0,0,1,0,1,0,@hamnihouse,0.34,@by_he.nique,0.21,@jelly_wony,0.20


# 사용자 input 읽어서 결과 출력

아래의 코드는 일부 수정 후, 프론트엔드 코드와 연결할 수 있도록 'code.py'로 내보낸 파일이 존재함.

따라서 프론트엔드 코드 폴더와 함께 있는 code.py와 일부 코드 구성이 다를 수 있음.

In [None]:
# 웹을 통해서 사용자가 선택한 input_data.csv 파일 불러오기
input_data = pd.read_csv('/content/drive/MyDrive/2023 KUIAI 해커톤/input_data.csv')
input_data

Unnamed: 0,Gender,Tone,Season,Style
0,남자,톤온톤,봄/가을,캐주얼


In [None]:
# 사용자의 input data를 받아서 결과를 출력하는 함수 정의
def result_print(input_data):

  input_data = input_data.loc[0].to_dict()

  # 남자, 여자 Database 불러오기
  man_answer = pd.read_csv('/content/drive/MyDrive/2023 KUIAI 해커톤/final_man_dataset.csv')
  woman_answer = pd.read_csv('/content/drive/MyDrive/2023 KUIAI 해커톤/final_woman_dataset.csv')

  # 웹에서 전달받은 input과 기존에 만들어진 Database 간의 매칭을 시키기 위한 Dictionary 선언
  man_style_match = {
      '아메리칸 캐주얼':'style_0',
      '캐주얼':'style_1',
      '댄디':'style_2',
      '포멀':'style_3',
      '루즈핏':'style_4',
      '스트릿':'style_5'
  }

  woman_style_match = {
      '캐주얼':'style_0',
      '로맨틱':'style_1',
      '걸리시':'style_2',
      '시크':'style_3',
      '스트릿':'style_4',
      '포멀':'style_5',
      '루즈핏':'style_6'
  }

  tone_match = {
      '모노톤':'tone_0',
      '뉴트럴톤':'tone_1',
      '파스텔톤':'tone_2',
      '톤온톤':'tone_3'
  }

  season_match = {
      '여름':'season_0',
      '봄/가을':'season_1',
      '겨울':'season_2'
  }

  # input_data csv 파일의 'Gender' 정보를 이용하여 남자, 여자를 나누어 코드가 진행되도록 구현함
  # 남자 function과 여자 function의 경우 구성이 거의 동일함
  if input_data['Gender'] == '남자':
    result_func_man(input_data, man_answer, man_style_match, tone_match, season_match)
  else: # 여자의 경우
    result_func_woman(input_data, woman_answer, woman_style_match, tone_match, season_match)


# 남자
def result_func_man(input_data, man_answer, man_style_match, tone_match, season_match):
  '''
  - input_data: 사용자가 입력한 input_data
  - man_answer: 남자 인플루언서 Database
  - man_style_match, tone_match, season_match: Database 매칭을 위한 Dictionary
  '''

  # 사용자가 입력한 'style', 'tone', 'season' 값을 각각의 변수에 저장
  user_style = input_data['Style']
  user_tone = input_data['Tone']
  user_season = input_data['Season']

  # 사용자가 입력한 태그의 조건과 일치하는 정보를 Database에서 가져오기
  # 사용자가 입력한 태그의 조건을 'condition' 변수에 저장 -> 입력한 조건을 1로 찾음
  condition = (man_answer[man_style_match[user_style]] == 1) & (man_answer[tone_match[user_tone]] == 1) & (man_answer[season_match[user_season]] == 1)
  result = man_answer[condition]
  columns = ['first_influ','second_influ','third_influ']

  send = result[columns].iloc[0].to_list()

  # 최종 상위 3명의 인플루언서 id를 .txt 파일로 구성하여 내보내기
  f = open("/content/drive/MyDrive/2023 KUIAI 해커톤/recommendation_result.txt", 'w')
  for i in range(len(send)):
    data = str(i+1) + "번째 맞춤 인플루언서: " + send[i] + '\n'
    f.write(data)
  f.close()


# 여자
def result_func_woman(input_data, woman_answer, woman_style_match, tone_match, season_match):
  '''
  - input_data: 사용자가 입력한 input_data
  - woman_answer: 여자 인플루언서 Database
  - woman_style_match, tone_match, season_match: Database 매칭을 위한 Dictionary
  '''
  user_style = input_data['Style']
  user_tone = input_data['Tone']
  user_season = input_data['Season']

  condition = (woman_answer[woman_style_match[user_style]] == 1) & (woman_answer[tone_match[user_tone]] == 1) & (woman_answer[season_match[user_season]] == 1)
  result = woman_answer[condition]
  columns = ['first_influ','second_influ','third_influ']

  send = result[columns].iloc[0].to_list()

  f = open("/content/drive/MyDrive/2023 KUIAI 해커톤/recommendation_result.txt", 'w')
  for i in range(len(send)):
    data = str(i+1) + "번째 맞춤 인플루언서: " + send[i] + '\n'
    f.write(data)
  f.close()

In [None]:
# 최종적으로 실행해야하는 코드 -> 이 코드 하나를 실행시키면 자동적으로 3명의 인플루언서 정보가 담긴 텍스트 파일을 저장함
result_print(input_data)