In [None]:
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "dce437ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from konlpy.tag import Okt\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from tqdm import tqdm\n",
    "from collections import Counter\n",
    "from soyspacing.countbase import CountSpace\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "410bf0d5",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/j0/p8q34w_j4yz0ghqcplk17s240000gn/T/ipykernel_13424/2350099597.py:7: DtypeWarning: Columns (6,7,10,11,13,15) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  df = pd.read_csv(file_path)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                title              outline     mastrId     imageDownloadUrl mainGenreCdNm pictrWritrNm pltfomCdNm     filtered_outline\n",
      "0        회장님의 백만가지 대본  교통사고로 머리를 다친 강예성...  2023059426  https://www.kmas...            BL           일천        피너툰  교통사고 머리 다친 강 예성 ...\n",
      "1           회사원 K의 비밀  직장생활 5년 차 김대리 그에...  2023059425  https://www.kmas...            BL           모락     카카오페이지  직장 생활 차 김 대리 그 딱...\n",
      "2      황자님, 왜 잘해 주세요?  저 저를 어떻게 하시려는 거예...  2023059422  https://www.kmas...           드라마           강차     카카오페이지  저 저 어떻게 하시려는 거 강...\n",
      "3        환상의 에덴 [개정판]  어떠한 이유로 방랑하고 있는 ...  2023059421  https://www.kmas...            BL         후지토비     카카오페이지  어떠한 이유 방랑 있는 사형 ...\n",
      "4         화려한 혼활 버스터즈  혼활에서 연전연패 중인 팔방미...  2023059420  https://www.kmas...           로맨스     hiromyan      레진코믹스  혼활 연전 연패 중인 팔방미인...\n",
      "...               ...                  ...         ...                  ...           ...          ...        ...                  ...\n",
      "20152        060 특수부대  연재기간 20100323 총회...  2017023368  https://www.kmas...            코믹     스바르탄/정재호      머니투데이  연재 기간 총회 차 연재 매체...\n",
      "20153        0.1초의 설렘  긴 사랑과 짧은 설렘에 대한 보고서  2017023367  https://www.kmas...           드라마          임강혁       다음웹툰   긴 사랑 짧은 설렘에 대한 보고서\n",
      "20154    0.0MHz  (완결)  심령현상을 과학적으로 밝히려는...  2017023366  https://www.kmas...            공포           장작       다음웹툰  심령 현상 과학 밝히려는 위험...\n",
      "20155          -0.5˚C  아버지의 불륜으로 사랑에 거부...  2017023365  https://www.kmas...           동성애           연어      레진코믹스  아버지 불륜 사랑 거부 감 보...\n",
      "20156            #해모나  셀기꾼 SNS 인기스타인 모나...  2017023364  https://www.kmas...           이성애           신매      배틀코믹스  셀기 꾼 인기 스타인 모나 날...\n",
      "\n",
      "[20157 rows x 8 columns]\n"
     ]
    }
   ],
   "source": [
    "file_path = '/Users/choedohyeon/desktop/workarea/summery/webtoon.csv'\n",
    "\n",
    "# 모델 생성\n",
    "model = CountSpace()\n",
    "okt = Okt()\n",
    "\n",
    "df = pd.read_csv(file_path)\n",
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.expand_frame_repr', False)\n",
    "pd.set_option('display.max_colwidth', 20)\n",
    "html_table = df.to_html()\n",
    "styled_table = df.style.set_table_styles([{\n",
    "    'selector': 'td',\n",
    "    'props': [('max-width', '100px')]\n",
    "}])\n",
    "\n",
    "df = df[['title', 'outline','mastrId', 'imageDownloadUrl', 'mainGenreCdNm', 'pictrWritrNm', 'pltfomCdNm']]\n",
    "df = df.drop_duplicates(subset=['title'])\n",
    "df = df.drop_duplicates(subset=['outline'])\n",
    "df = df.drop_duplicates(subset=['imageDownloadUrl'])\n",
    "\n",
    "df['outline'] = df['outline'].str.replace(pat=r\"[^\\w\\s]\", repl=r'', regex=True)\n",
    "df['outline'] = df['outline'].str.replace(pat=r\"[\\r]\", repl=r'', regex=True)\n",
    "df['outline'] = df['outline'].str.replace(pat=r\"[\\n]\", repl=r' ', regex=True)\n",
    "df = df.dropna()\n",
    "df = df.reset_index(drop=True)\n",
    "\n",
    "outlines = df['outline'].tolist()\n",
    "\n",
    "corrected_outlines = [model.correct(outline)[0] for outline in outlines if outline != '1' and outline is not None]\n",
    "\n",
    "df['outline'] = corrected_outlines\n",
    "\n",
    "def filter_pos(text):\n",
    "    pos_tags = okt.pos(text)\n",
    "    filtered_words = [word for word, pos in pos_tags if pos in ['Adjective', 'Adverb', 'Verb', 'Noun']]\n",
    "    return ' '.join(filtered_words)\n",
    "\n",
    "df['filtered_outline'] = df['outline'].apply(filter_pos)\n",
    "\n",
    "print(df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "b2b591be",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['BL' '드라마' '로맨스' '소년' '순정' '추리미스터리' '이성애' '학습만화' '무협' '판타지' '코믹' '액션'\n",
      " '교양' '학원' '일상' '로맨스판타지' 'GL' '모험' '역사' 'SF' '스포츠' '공포' '동성애' '성인' '종교'\n",
      " '요리' '캠페인' '시사']\n"
     ]
    }
   ],
   "source": [
    "unique_genres = df['mainGenreCdNm'].unique()\n",
    "print(unique_genres)\n",
    "\n",
    "df['adult'] = 0\n",
    "adult_genres = ['성인', 'BL', 'GL', '동성애']\n",
    "df.loc[df['mainGenreCdNm'].isin(adult_genres), 'adult'] = 1\n",
    "keywords = ['섹스', '불륜', '야동', '암컷', '수컷']\n",
    "pattern = re.compile('|'.join(keywords))\n",
    "\n",
    "df.loc[(df['outline'].str.contains(pattern)) & (df['adult'] == 0), 'adult'] = 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "b23c0cc3",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'그': 1,\n",
       " '하는': 2,\n",
       " '것': 3,\n",
       " '수': 4,\n",
       " '자신': 5,\n",
       " '날': 6,\n",
       " '사람': 7,\n",
       " '된': 8,\n",
       " '남자': 9,\n",
       " '된다': 10,\n",
       " '시작': 11,\n",
       " '그녀': 12,\n",
       " '이': 13,\n",
       " '위해': 14,\n",
       " '사랑': 15,\n",
       " '한다': 16,\n",
       " '있는': 17,\n",
       " '한': 18,\n",
       " '어느': 19,\n",
       " '하게': 20,\n",
       " '내': 21,\n",
       " '해': 22,\n",
       " '나': 23,\n",
       " '할': 24,\n",
       " '없는': 25,\n",
       " '이야기': 26,\n",
       " '되는데': 27,\n",
       " '알': 28,\n",
       " '말': 29,\n",
       " '하는데': 30,\n",
       " '마음': 31,\n",
       " '두': 32,\n",
       " '몸': 33,\n",
       " '곳': 34,\n",
       " '여자': 35,\n",
       " '같은': 36,\n",
       " '다': 37,\n",
       " '일': 38,\n",
       " '친구': 39,\n",
       " '속': 40,\n",
       " '되고': 41,\n",
       " '집': 42,\n",
       " '생각': 43,\n",
       " '함께': 44,\n",
       " '눈': 45,\n",
       " '세계': 46,\n",
       " '때': 47,\n",
       " '앞': 48,\n",
       " '못': 49,\n",
       " '되어': 50,\n",
       " '그런': 51,\n",
       " '결혼': 52,\n",
       " '살': 53,\n",
       " '세상': 54,\n",
       " '인간': 55,\n",
       " '과연': 56,\n",
       " '전': 57,\n",
       " '중': 58,\n",
       " '있을까': 59,\n",
       " '안': 60,\n",
       " '후': 61,\n",
       " '있다': 62,\n",
       " '연애': 63,\n",
       " '다시': 64,\n",
       " '다른': 65,\n",
       " '모습': 66,\n",
       " '하던': 67,\n",
       " '생활': 68,\n",
       " '모든': 69,\n",
       " '하지': 70,\n",
       " '더': 71,\n",
       " '위': 72,\n",
       " '관계': 73,\n",
       " '주인공': 74,\n",
       " '하나': 75,\n",
       " '비밀': 76,\n",
       " '하기': 77,\n",
       " '운명': 78,\n",
       " '사실': 79,\n",
       " '갑자기': 80,\n",
       " '작가': 81,\n",
       " '되는': 82,\n",
       " '건': 83,\n",
       " '거': 84,\n",
       " '사이': 85,\n",
       " '때문': 86,\n",
       " '밤': 87,\n",
       " '하고': 88,\n",
       " '회사': 89,\n",
       " '있었다': 90,\n",
       " '온': 91,\n",
       " '하며': 92,\n",
       " '게임': 93,\n",
       " '아이': 94,\n",
       " '둘': 95,\n",
       " '꿈': 96,\n",
       " '삶': 97,\n",
       " '그러던': 98,\n",
       " '했다': 99,\n",
       " '뒤': 100,\n",
       " '버린': 101,\n",
       " '처음': 102,\n",
       " '서로': 103,\n",
       " '키': 104,\n",
       " '남': 105,\n",
       " '줄': 106,\n",
       " '인생': 107,\n",
       " '걸': 108,\n",
       " '우리': 109,\n",
       " '가진': 110,\n",
       " '사건': 111,\n",
       " '세': 112,\n",
       " '우연히': 113,\n",
       " '로맨스': 114,\n",
       " '평범한': 115,\n",
       " '소녀': 116,\n",
       " '남편': 117,\n",
       " '너': 118,\n",
       " '채': 119,\n",
       " '점점': 120,\n",
       " '만화': 121,\n",
       " '했던': 122,\n",
       " '지금': 123,\n",
       " '않는': 124,\n",
       " '연재': 125,\n",
       " '얼굴': 126,\n",
       " '없이': 127,\n",
       " '이자': 128,\n",
       " '기억': 129,\n",
       " '과거': 130,\n",
       " '손': 131,\n",
       " '의': 132,\n",
       " '정체': 133,\n",
       " '학교': 134,\n",
       " '짝사랑': 135,\n",
       " '선배': 136,\n",
       " '게': 137,\n",
       " '복수': 138,\n",
       " '바로': 139,\n",
       " '누구': 140,\n",
       " '상대': 141,\n",
       " '시간': 142,\n",
       " '능력': 143,\n",
       " '적': 144,\n",
       " '만다': 145,\n",
       " '구': 146,\n",
       " '당신': 147,\n",
       " '될': 148,\n",
       " '순간': 149,\n",
       " '인기': 150,\n",
       " '만난': 151,\n",
       " '일상': 152,\n",
       " '있던': 153,\n",
       " '가지': 154,\n",
       " '새로운': 155,\n",
       " '고백': 156,\n",
       " '아버지': 157,\n",
       " '하루': 158,\n",
       " '모두': 159,\n",
       " '이제': 160,\n",
       " '차': 161,\n",
       " '죽음': 162,\n",
       " '그것': 163,\n",
       " '제안': 164,\n",
       " '왜': 165,\n",
       " '끝': 166,\n",
       " '섹스': 167,\n",
       " '소년': 168,\n",
       " '네': 169,\n",
       " '존재': 170,\n",
       " '그렇게': 171,\n",
       " '번': 172,\n",
       " '나타난': 173,\n",
       " '소꿉친구': 174,\n",
       " '카': 175,\n",
       " '인해': 176,\n",
       " '키스': 177,\n",
       " '가족': 178,\n",
       " '저': 179,\n",
       " '보니': 180,\n",
       " '동거': 181,\n",
       " '이름': 182,\n",
       " '코': 183,\n",
       " '동생': 184,\n",
       " '향': 185,\n",
       " '결국': 186,\n",
       " '대한': 187,\n",
       " '힘': 188,\n",
       " '않은': 189,\n",
       " '제': 190,\n",
       " '사고': 191,\n",
       " '본': 192,\n",
       " '돈': 193,\n",
       " '너무': 194,\n",
       " '만나게': 195,\n",
       " '히': 196,\n",
       " '시절': 197,\n",
       " '잘': 198,\n",
       " '사는': 199,\n",
       " '아닌': 200,\n",
       " '계속': 201,\n",
       " '최고': 202,\n",
       " '감정': 203,\n",
       " '준': 204,\n",
       " '어떤': 205,\n",
       " '이유': 206,\n",
       " '러브': 207,\n",
       " '뭐': 208,\n",
       " '마': 209,\n",
       " '술': 210,\n",
       " '받은': 211,\n",
       " '가슴': 212,\n",
       " '보고': 213,\n",
       " '연인': 214,\n",
       " '가장': 215,\n",
       " '피': 216,\n",
       " '이었다': 217,\n",
       " '오늘': 218,\n",
       " '현실': 219,\n",
       " '마는데': 220,\n",
       " '딸': 221,\n",
       " '형': 222,\n",
       " '였다': 223,\n",
       " '없어': 224,\n",
       " '이상': 225,\n",
       " '도': 226,\n",
       " '외모': 227,\n",
       " '나를': 228,\n",
       " '게다가': 229,\n",
       " '대학': 230,\n",
       " '듯': 231,\n",
       " '이렇게': 232,\n",
       " '계약': 233,\n",
       " '첫': 234,\n",
       " '아이돌': 235,\n",
       " '황제': 236,\n",
       " '소설': 237,\n",
       " '고민': 238,\n",
       " '발견': 239,\n",
       " '방': 240,\n",
       " '아들': 241,\n",
       " '되었다': 242,\n",
       " '이런': 243,\n",
       " '웹툰': 244,\n",
       " '판타지': 245,\n",
       " '남자친구': 246,\n",
       " '매일': 247,\n",
       " '성격': 248,\n",
       " '상황': 249,\n",
       " '첫사랑': 250,\n",
       " '싶은': 251,\n",
       " '난': 252,\n",
       " '또': 253,\n",
       " '좋아하는': 254,\n",
       " '있을': 255,\n",
       " '대신': 256,\n",
       " '대학생': 257,\n",
       " '고등학교': 258,\n",
       " '좋은': 259,\n",
       " '정신': 260,\n",
       " '단': 261,\n",
       " '신': 262,\n",
       " '주인': 263,\n",
       " '진짜': 264,\n",
       " '했지만': 265,\n",
       " '어린': 266,\n",
       " '위기': 267,\n",
       " '하여': 268,\n",
       " '를': 269,\n",
       " '리': 270,\n",
       " '어떻게': 271,\n",
       " '뿐': 272,\n",
       " '미래': 273,\n",
       " '결심': 274,\n",
       " '사장': 275,\n",
       " '이후': 276,\n",
       " '기분': 277,\n",
       " '가게': 278,\n",
       " '길': 279,\n",
       " '일까': 280,\n",
       " '생': 281,\n",
       " '만나': 282,\n",
       " '하면': 283,\n",
       " '많은': 284,\n",
       " '가문': 285,\n",
       " '이번': 286,\n",
       " '상사': 287,\n",
       " '모르는': 288,\n",
       " '큰': 289,\n",
       " '악마': 290,\n",
       " '버렸다': 291,\n",
       " '은': 292,\n",
       " '자기': 293,\n",
       " '자리': 294,\n",
       " '엄마': 295,\n",
       " '만남': 296,\n",
       " '기': 297,\n",
       " '휘': 298,\n",
       " '재회': 299,\n",
       " '받고': 300,\n",
       " '부탁': 301,\n",
       " '토': 302,\n",
       " '자': 303,\n",
       " '전쟁': 304,\n",
       " '기간': 305,\n",
       " '스토리': 306,\n",
       " '동료': 307,\n",
       " '혼자': 308,\n",
       " '받게': 309,\n",
       " '기회': 310,\n",
       " '남친': 311,\n",
       " '없다': 312,\n",
       " '않고': 313,\n",
       " '빙의': 314,\n",
       " '귀여운': 315,\n",
       " '선택': 316,\n",
       " '맞이': 317,\n",
       " '전생': 318,\n",
       " '명': 319,\n",
       " '마을': 320,\n",
       " '무엇': 321,\n",
       " '가': 322,\n",
       " '같이': 323,\n",
       " '버리고': 324,\n",
       " '다음': 325,\n",
       " '선': 326,\n",
       " '아내': 327,\n",
       " '경험': 328,\n",
       " '바람': 329,\n",
       " '명의': 330,\n",
       " '정도': 331,\n",
       " '거리': 332,\n",
       " '그날': 333,\n",
       " '어릴': 334,\n",
       " '도움': 335,\n",
       " '소리': 336,\n",
       " '보이는': 337,\n",
       " '개': 338,\n",
       " '회사원': 339,\n",
       " '데': 340,\n",
       " '아름다운': 341,\n",
       " '보는': 342,\n",
       " '동안': 343,\n",
       " '최강': 344,\n",
       " '조금': 345,\n",
       " '이상한': 346,\n",
       " '간다': 347,\n",
       " '목숨': 348,\n",
       " '배우': 349,\n",
       " '치': 350,\n",
       " '해서': 351,\n",
       " '통해': 352,\n",
       " '여행': 353,\n",
       " '입니다': 354,\n",
       " '마왕': 355,\n",
       " '씨': 356,\n",
       " '상처': 357,\n",
       " '왕자': 358,\n",
       " '평소': 359,\n",
       " '어머니': 360,\n",
       " '달라': 361,\n",
       " '고양이': 362,\n",
       " '년': 363,\n",
       " '였던': 364,\n",
       " '알파': 365,\n",
       " '간': 366,\n",
       " '미남': 367,\n",
       " '선생님': 368,\n",
       " '그룹': 369,\n",
       " '마법': 370,\n",
       " '특별한': 371,\n",
       " '매체': 372,\n",
       " '만': 373,\n",
       " '이었던': 374,\n",
       " '유일한': 375,\n",
       " '찾아': 376,\n",
       " '작품': 377,\n",
       " '로': 378,\n",
       " '저주': 379,\n",
       " '그때': 380,\n",
       " '공주': 381,\n",
       " '돼': 382,\n",
       " '누군가': 383,\n",
       " '방법': 384,\n",
       " '등장': 385,\n",
       " '하자': 386,\n",
       " '반': 387,\n",
       " '부모님': 388,\n",
       " '도시': 389,\n",
       " '여성': 390,\n",
       " '시대': 391,\n",
       " '마지막': 392,\n",
       " '하다': 393,\n",
       " '감': 394,\n",
       " '등': 395,\n",
       " '하룻밤': 396,\n",
       " '유혹': 397,\n",
       " '받는': 398,\n",
       " '후배': 399,\n",
       " '관심': 400,\n",
       " '해도': 401,\n",
       " '오메가': 402,\n",
       " '왕': 403,\n",
       " '로서': 404,\n",
       " '오빠': 405,\n",
       " '달콤한': 406,\n",
       " '비': 407,\n",
       " '잘생긴': 408,\n",
       " '팀': 409,\n",
       " '소문': 410,\n",
       " '해야': 411,\n",
       " '노': 412,\n",
       " '완벽한': 413,\n",
       " '버린다': 414,\n",
       " '욕망': 415,\n",
       " '누나': 416,\n",
       " '청춘': 417,\n",
       " '직장': 418,\n",
       " '하지만': 419,\n",
       " '야한': 420,\n",
       " '받는다': 421,\n",
       " '주변': 422,\n",
       " '동경': 423,\n",
       " '무': 424,\n",
       " '지구': 425,\n",
       " '학생': 426,\n",
       " '강': 427,\n",
       " '로부터': 428,\n",
       " '어디': 429,\n",
       " '전설': 430,\n",
       " '거절': 431,\n",
       " '이용': 432,\n",
       " '무슨': 433,\n",
       " '했는데': 434,\n",
       " '언니': 435,\n",
       " '곁': 436,\n",
       " '볼': 437,\n",
       " '호텔': 438,\n",
       " '당황': 439,\n",
       " '는': 440,\n",
       " '전혀': 441,\n",
       " '절대': 442,\n",
       " '부': 443,\n",
       " '머리': 444,\n",
       " '죽은': 445,\n",
       " '나날': 446,\n",
       " '받아': 447,\n",
       " '게이': 448,\n",
       " '유키': 449,\n",
       " '제국': 450,\n",
       " '않아': 451,\n",
       " '있다는': 452,\n",
       " '마주': 453,\n",
       " '천재': 454,\n",
       " '미': 455,\n",
       " '싶어': 456,\n",
       " '작은': 457,\n",
       " '매력': 458,\n",
       " '있어': 459,\n",
       " '놈': 460,\n",
       " '청년': 461,\n",
       " '공작': 462,\n",
       " '달': 463,\n",
       " '계기': 464,\n",
       " '하늘': 465,\n",
       " '좀': 466,\n",
       " '잃은': 467,\n",
       " '화': 468,\n",
       " '새': 469,\n",
       " '애인': 470,\n",
       " '여기': 471,\n",
       " '우': 472,\n",
       " '바': 473,\n",
       " '바로가기': 474,\n",
       " '학원': 475,\n",
       " '달리': 476,\n",
       " '총회': 477,\n",
       " '라며': 478,\n",
       " '성인': 479,\n",
       " '인류': 480,\n",
       " '않는다': 481,\n",
       " '귀신': 482,\n",
       " '조직': 483,\n",
       " '있는데': 484,\n",
       " '고등학생': 485,\n",
       " '배신': 486,\n",
       " '몇': 487,\n",
       " '소원': 488,\n",
       " '의해': 489,\n",
       " '녀석': 490,\n",
       " '목격': 491,\n",
       " '아무': 492,\n",
       " '무사히': 493,\n",
       " '충격': 494,\n",
       " '실수': 495,\n",
       " '죽': 496,\n",
       " '상태': 497,\n",
       " '항상': 498,\n",
       " '의문': 499,\n",
       " '가는': 500,\n",
       " '걸까': 501,\n",
       " '목소리': 502,\n",
       " '아빠': 503,\n",
       " '잠시': 504,\n",
       " '찾아온': 505,\n",
       " '와': 506,\n",
       " '빚': 507,\n",
       " '나라': 508,\n",
       " '괴물': 509,\n",
       " '용': 510,\n",
       " '남녀': 511,\n",
       " '심지어': 512,\n",
       " '커플': 513,\n",
       " '우연': 514,\n",
       " '대': 515,\n",
       " '재벌': 516,\n",
       " '살인': 517,\n",
       " '잃고': 518,\n",
       " '대표': 519,\n",
       " '나이': 520,\n",
       " '코미디': 521,\n",
       " '문제': 522,\n",
       " '한번': 523,\n",
       " '늘': 524,\n",
       " '하면서': 525,\n",
       " '거기': 526,\n",
       " '꽃미남': 527,\n",
       " '여자친구': 528,\n",
       " '노력': 529,\n",
       " '은밀': 530,\n",
       " '고': 531,\n",
       " '처': 532,\n",
       " '카페': 533,\n",
       " '눈앞': 534,\n",
       " '집안': 535,\n",
       " '집착': 536,\n",
       " '싸움': 537,\n",
       " '아직': 538,\n",
       " '살아가는': 539,\n",
       " '영애': 540,\n",
       " '혼란': 541,\n",
       " '녀': 542,\n",
       " '부부': 543,\n",
       " '직원': 544,\n",
       " '합니다': 545,\n",
       " '약속': 546,\n",
       " '진': 547,\n",
       " '벌어지는': 548,\n",
       " '한국': 549,\n",
       " '드라마': 550,\n",
       " '연': 551,\n",
       " '거부': 552,\n",
       " '라면': 553,\n",
       " '보게': 554,\n",
       " '오해': 555,\n",
       " '돌아온': 556,\n",
       " '빠진': 557,\n",
       " '한편': 558,\n",
       " '입': 559,\n",
       " '조건': 560,\n",
       " '행복한': 561,\n",
       " '되기': 562,\n",
       " '준비': 563,\n",
       " '지옥': 564,\n",
       " '탓': 565,\n",
       " '정말': 566,\n",
       " '위험한': 567,\n",
       " '영웅': 568,\n",
       " '영혼': 569,\n",
       " '손님': 570,\n",
       " '만난다': 571,\n",
       " '생긴': 572,\n",
       " '아는': 573,\n",
       " '첫날': 574,\n",
       " '어른': 575,\n",
       " '아르바이트': 576,\n",
       " '있었는데': 577,\n",
       " '마리': 578,\n",
       " '수상한': 579,\n",
       " '따라': 580,\n",
       " '마법사': 581,\n",
       " '상상': 582,\n",
       " '하기로': 583,\n",
       " '옆': 584,\n",
       " '마치': 585,\n",
       " '약혼자': 586,\n",
       " '애': 587,\n",
       " '사내': 588,\n",
       " '인': 589,\n",
       " '지': 590,\n",
       " '행동': 591,\n",
       " '해결': 592,\n",
       " '취향': 593,\n",
       " '이미': 594,\n",
       " '마녀': 595,\n",
       " '신경': 596,\n",
       " '대해': 597,\n",
       " '진실': 598,\n",
       " '꽃': 599,\n",
       " '낯선': 600,\n",
       " '유우': 601,\n",
       " '요괴': 602,\n",
       " '그저': 603,\n",
       " '자꾸만': 604,\n",
       " '당하는': 605,\n",
       " '액션': 606,\n",
       " '인연': 607,\n",
       " '언제나': 608,\n",
       " '스스로': 609,\n",
       " '포기': 610,\n",
       " '뜨거운': 611,\n",
       " '기사': 612,\n",
       " '남자에게': 613,\n",
       " '본격': 614,\n",
       " '보내고': 615,\n",
       " '옆집': 616,\n",
       " '점': 617,\n",
       " '소개': 618,\n",
       " '옷': 619,\n",
       " '시골': 620,\n",
       " '드디어': 621,\n",
       " '몰래': 622,\n",
       " '계획': 623,\n",
       " '과': 624,\n",
       " '임': 625,\n",
       " '아니라': 626,\n",
       " '되지': 627,\n",
       " '김': 628,\n",
       " '정': 629,\n",
       " '물': 630,\n",
       " '누가': 631,\n",
       " '라이프': 632,\n",
       " '아침': 633,\n",
       " '의사': 634,\n",
       " '이혼': 635,\n",
       " '보이': 636,\n",
       " '엄청난': 637,\n",
       " '교수': 638,\n",
       " '태도': 639,\n",
       " '나타난다': 640,\n",
       " '자극': 641,\n",
       " '가득한': 642,\n",
       " '펼쳐지는': 643,\n",
       " '척': 644,\n",
       " '목표': 645,\n",
       " '사회': 646,\n",
       " '좌충우돌': 647,\n",
       " '대체': 648,\n",
       " '시': 649,\n",
       " '태어난': 650,\n",
       " '입학': 651,\n",
       " '태': 652,\n",
       " '침대': 653,\n",
       " '사': 654,\n",
       " '제일': 655,\n",
       " '호': 656,\n",
       " '이사': 657,\n",
       " '캐릭터': 658,\n",
       " '찾기': 659,\n",
       " '남은': 660,\n",
       " '다짐': 661,\n",
       " '동기': 662,\n",
       " '중인': 663,\n",
       " '아래': 664,\n",
       " '후계': 665,\n",
       " '섬': 666,\n",
       " '책': 667,\n",
       " '숨겨진': 668,\n",
       " '되면서': 669,\n",
       " '숨기': 670,\n",
       " '당해': 671,\n",
       " '문': 672,\n",
       " '대한민국': 673,\n",
       " '아니': 674,\n",
       " '잠': 675,\n",
       " '젊은': 676,\n",
       " '성적': 677,\n",
       " '조금씩': 678,\n",
       " '원하는': 679,\n",
       " '신부': 680,\n",
       " '졸업': 681,\n",
       " '어쩔': 682,\n",
       " '않을': 683,\n",
       " '천사': 684,\n",
       " '운영': 685,\n",
       " '만들어': 686,\n",
       " '담당': 687,\n",
       " '위로': 688,\n",
       " '회귀': 689,\n",
       " '심': 690,\n",
       " '평생': 691,\n",
       " '요구': 692,\n",
       " '넘치는': 693,\n",
       " '모델': 694,\n",
       " '요': 695,\n",
       " '테': 696,\n",
       " '린': 697,\n",
       " '진심': 698,\n",
       " '수가': 699,\n",
       " '모험': 700,\n",
       " '원래': 701,\n",
       " '알았던': 702,\n",
       " '분위기': 703,\n",
       " '무림': 704,\n",
       " '연기': 705,\n",
       " '산': 706,\n",
       " '자꾸': 707,\n",
       " '반응': 708,\n",
       " '수인': 709,\n",
       " '신의': 710,\n",
       " '아저씨': 711,\n",
       " '오랜': 712,\n",
       " '소식': 713,\n",
       " '두고': 714,\n",
       " '우주': 715,\n",
       " '파트너': 716,\n",
       " '될까': 717,\n",
       " '홀로': 718,\n",
       " '곧': 719,\n",
       " '깊은': 720,\n",
       " '전부': 721,\n",
       " '빛': 722,\n",
       " '손길': 723,\n",
       " '들어': 724,\n",
       " '이별': 725,\n",
       " '군': 726,\n",
       " '현재': 727,\n",
       " '불리는': 728,\n",
       " '다정한': 729,\n",
       " '요리': 730,\n",
       " '사정': 731,\n",
       " '나와': 732,\n",
       " '교사': 733,\n",
       " '탈출': 734,\n",
       " '인물': 735,\n",
       " '케이': 736,\n",
       " '찬': 737,\n",
       " '빠져': 738,\n",
       " '재능': 739,\n",
       " '법': 740,\n",
       " '당하고': 741,\n",
       " '이를': 742,\n",
       " '데뷔': 743,\n",
       " '제자': 744,\n",
       " '신입': 745,\n",
       " '대상': 746,\n",
       " '이치': 747,\n",
       " '유': 748,\n",
       " '하려': 749,\n",
       " '생존': 750,\n",
       " '운': 751,\n",
       " '주는': 752,\n",
       " '좀비': 753,\n",
       " '나타나': 754,\n",
       " '야쿠자': 755,\n",
       " '갈': 756,\n",
       " '시선': 757,\n",
       " '민': 758,\n",
       " '병': 759,\n",
       " '나타났다': 760,\n",
       " '범': 761,\n",
       " '여인': 762,\n",
       " '싶지': 763,\n",
       " '저택': 764,\n",
       " '뱀파이어': 765,\n",
       " '쾌감': 766,\n",
       " '하필': 767,\n",
       " '귀족': 768,\n",
       " '알바': 769,\n",
       " '아니면': 770,\n",
       " '떠보니': 771,\n",
       " '사쿠라': 772,\n",
       " '트라우마': 773,\n",
       " '다니는': 774,\n",
       " '제대로': 775,\n",
       " '도전': 776,\n",
       " '현': 777,\n",
       " '조선': 778,\n",
       " '공': 779,\n",
       " '없던': 780,\n",
       " '여러': 781,\n",
       " '동시': 782,\n",
       " '듣게': 783,\n",
       " '출신': 784,\n",
       " '여고생': 785,\n",
       " '단편': 786,\n",
       " '파티': 787,\n",
       " '접근': 788,\n",
       " '묘': 789,\n",
       " '막': 790,\n",
       " '비서': 791,\n",
       " '모르게': 792,\n",
       " '아주': 793,\n",
       " '그린': 794,\n",
       " '그대로': 795,\n",
       " '최': 796,\n",
       " '여동생': 797,\n",
       " '살던': 798,\n",
       " '와중': 799,\n",
       " '지키기': 800,\n",
       " '목적': 801,\n",
       " '학년': 802,\n",
       " '황자': 803,\n",
       " '시험': 804,\n",
       " '장난': 805,\n",
       " '실력': 806,\n",
       " '스케': 807,\n",
       " '살해': 808,\n",
       " '프로젝트': 809,\n",
       " '진정한': 810,\n",
       " '레이': 811,\n",
       " '절망': 812,\n",
       " '심장': 813,\n",
       " '그냥': 814,\n",
       " '초': 815,\n",
       " '악역': 816,\n",
       " '파': 817,\n",
       " '눈물': 818,\n",
       " '자취': 819,\n",
       " '근무': 820,\n",
       " '해준': 821,\n",
       " '지닌': 822,\n",
       " '쾌락': 823,\n",
       " '활동': 824,\n",
       " '현장': 825,\n",
       " '수많은': 826,\n",
       " '별': 827,\n",
       " '직접': 828,\n",
       " '성장': 829,\n",
       " '듣고': 830,\n",
       " '다양한': 831,\n",
       " '란': 832,\n",
       " '마사지': 833,\n",
       " '아키': 834,\n",
       " '강한': 835,\n",
       " '그림': 836,\n",
       " '먼저': 837,\n",
       " '였지만': 838,\n",
       " '불명': 839,\n",
       " '치료': 840,\n",
       " '거지': 841,\n",
       " '당한': 842,\n",
       " '없었다': 843,\n",
       " '실': 844,\n",
       " '클럽': 845,\n",
       " '이건': 846,\n",
       " '희': 847,\n",
       " '불': 848,\n",
       " '형제': 849,\n",
       " '거짓말': 850,\n",
       " '사람과': 851,\n",
       " '쌍둥이': 852,\n",
       " '의식': 853,\n",
       " '터': 854,\n",
       " '온갖': 855,\n",
       " '그럼': 856,\n",
       " '있지만': 857,\n",
       " '물건': 858,\n",
       " '닮은': 859,\n",
       " '취해': 860,\n",
       " '미소': 861,\n",
       " '원작': 862,\n",
       " '더욱': 863,\n",
       " '아가씨': 864,\n",
       " '업무': 865,\n",
       " '버리는': 866,\n",
       " '짝': 867,\n",
       " '열심히': 868,\n",
       " '돌아왔다': 869,\n",
       " '애정': 870,\n",
       " '되지만': 871,\n",
       " '성': 872,\n",
       " '하러': 873,\n",
       " '각자': 874,\n",
       " '신작': 875,\n",
       " '유명한': 876,\n",
       " '짓': 877,\n",
       " '없고': 878,\n",
       " '반복': 879,\n",
       " '납치': 880,\n",
       " '검': 881,\n",
       " '대가': 882,\n",
       " '서울': 883,\n",
       " '영화': 884,\n",
       " '펼쳐진다': 885,\n",
       " '지도': 886,\n",
       " '엘리트': 887,\n",
       " '메': 888,\n",
       " '동아리': 889,\n",
       " '입사': 890,\n",
       " '병원': 891,\n",
       " '살기': 892,\n",
       " '정보': 893,\n",
       " '완전히': 894,\n",
       " '그만': 895,\n",
       " '생일': 896,\n",
       " '않게': 897,\n",
       " '손가락': 898,\n",
       " '내게': 899,\n",
       " '어째서': 900,\n",
       " '기묘한': 901,\n",
       " '직전': 902,\n",
       " '돌아갈': 903,\n",
       " '전학': 904,\n",
       " '경찰': 905,\n",
       " '우정': 906,\n",
       " '주의': 907,\n",
       " '발': 908,\n",
       " '보지': 909,\n",
       " '본인': 910,\n",
       " '늑대': 911,\n",
       " '보며': 912,\n",
       " '착각': 913,\n",
       " '미친': 914,\n",
       " '없었던': 915,\n",
       " '모를': 916,\n",
       " '왔다': 917,\n",
       " '구원': 918,\n",
       " '같아': 919,\n",
       " '살아온': 920,\n",
       " '임무': 921,\n",
       " '행복': 922,\n",
       " '이웃': 923,\n",
       " '성욕': 924,\n",
       " '나잇': 925,\n",
       " '탑': 926,\n",
       " '피해': 927,\n",
       " '료': 928,\n",
       " '고향': 929,\n",
       " '쓰레기': 930,\n",
       " '도착': 931,\n",
       " '공부': 932,\n",
       " '갖고': 933,\n",
       " '스타': 934,\n",
       " '타': 935,\n",
       " '반해': 936,\n",
       " '결혼식': 937,\n",
       " '소중한': 938,\n",
       " '갑작스러운': 939,\n",
       " '절친': 940,\n",
       " '반드시': 941,\n",
       " '동물': 942,\n",
       " '사진': 943,\n",
       " '강제': 944,\n",
       " '대학교': 945,\n",
       " '부장': 946,\n",
       " '공략': 947,\n",
       " '부모': 948,\n",
       " '오히려': 949,\n",
       " '나가는': 950,\n",
       " '보면': 951,\n",
       " '만드는': 952,\n",
       " '여름': 953,\n",
       " '괴롭힘': 954,\n",
       " '짐승': 955,\n",
       " '영': 956,\n",
       " '여주': 957,\n",
       " '꿈꾸는': 958,\n",
       " '꼭': 959,\n",
       " '움': 960,\n",
       " '유부녀': 961,\n",
       " '왕국': 962,\n",
       " '친': 963,\n",
       " '수도': 964,\n",
       " '시스템': 965,\n",
       " '왠지': 966,\n",
       " '설마': 967,\n",
       " '느끼는': 968,\n",
       " '있다고': 969,\n",
       " '도련님': 970,\n",
       " '공포': 971,\n",
       " '열': 972,\n",
       " '만든': 973,\n",
       " '교통사고': 974,\n",
       " '절정': 975,\n",
       " '이대로': 976,\n",
       " '널': 977,\n",
       " '오는': 978,\n",
       " '이의': 979,\n",
       " '됩니다': 980,\n",
       " '장소': 981,\n",
       " '걱정': 982,\n",
       " '예전': 983,\n",
       " '통': 984,\n",
       " '도중': 985,\n",
       " '해주는': 986,\n",
       " '동정': 987,\n",
       " '거대한': 988,\n",
       " '명령': 989,\n",
       " '버리는데': 990,\n",
       " '없을': 991,\n",
       " '신분': 992,\n",
       " '눈치': 993,\n",
       " '관': 994,\n",
       " '점차': 995,\n",
       " '멋진': 996,\n",
       " '살이': 997,\n",
       " '됐다': 998,\n",
       " '나온': 999,\n",
       " '언제': 1000,\n",
       " ...}"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "outlines = df['filtered_outline'].values\n",
    "words = ' '.join(outlines).split()\n",
    "counter = Counter(words)\n",
    "vocab = sorted(counter, key=counter.get, reverse=True)\n",
    "int2word = dict(enumerate(vocab, 1))\n",
    "int2word[0] = '<PAD>'\n",
    "word2int = {word: id for id, word in int2word.items()}\n",
    "word2int"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "f65a202a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 20157/20157 [00:00<00:00, 42793.89it/s]\n"
     ]
    }
   ],
   "source": [
    "outlines_enc = [[word2int[word] for word in outline.split()] for outline in tqdm(outlines)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "611dc4b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 20157/20157 [00:00<00:00, 169153.79it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                title              outline     mastrId     imageDownloadUrl mainGenreCdNm pictrWritrNm pltfomCdNm     filtered_outline  adult              encoded\n",
      "0        회장님의 백만가지 대본  교통사고로 머리를 다친 강예성...  2023059426  https://www.kmas...            BL           일천        피너툰  교통사고 머리 다친 강 예성 ...      1  [974, 444, 3570,...\n",
      "1           회사원 K의 비밀  직장생활 5년 차 김대리 그에...  2023059425  https://www.kmas...            BL           모락     카카오페이지  직장 생활 차 김 대리 그 딱...      1  [418, 68, 161, 6...\n",
      "2      황자님, 왜 잘해 주세요?  저 저를 어떻게 하시려는 거예...  2023059422  https://www.kmas...           드라마           강차     카카오페이지  저 저 어떻게 하시려는 거 강...      0  [179, 179, 271, ...\n",
      "3        환상의 에덴 [개정판]  어떠한 이유로 방랑하고 있는 ...  2023059421  https://www.kmas...            BL         후지토비     카카오페이지  어떠한 이유 방랑 있는 사형 ...      1  [2258, 206, 8152...\n",
      "4         화려한 혼활 버스터즈  혼활에서 연전연패 중인 팔방미...  2023059420  https://www.kmas...           로맨스     hiromyan      레진코믹스  혼활 연전 연패 중인 팔방미인...      0  [12960, 24217, 1...\n",
      "...               ...                  ...         ...                  ...           ...          ...        ...                  ...    ...                  ...\n",
      "20152        060 특수부대  연재기간 20100323 총회...  2017023368  https://www.kmas...            코믹     스바르탄/정재호      머니투데이  연재 기간 총회 차 연재 매체...      0  [125, 305, 477, ...\n",
      "20153        0.1초의 설렘  긴 사랑과 짧은 설렘에 대한 보고서  2017023367  https://www.kmas...           드라마          임강혁       다음웹툰   긴 사랑 짧은 설렘에 대한 보고서      0  [1377, 15, 2837,...\n",
      "20154    0.0MHz  (완결)  심령현상을 과학적으로 밝히려는...  2017023366  https://www.kmas...            공포           장작       다음웹툰  심령 현상 과학 밝히려는 위험...      0  [6078, 2000, 194...\n",
      "20155          -0.5˚C  아버지의 불륜으로 사랑에 거부...  2017023365  https://www.kmas...           동성애           연어      레진코믹스  아버지 불륜 사랑 거부 감 보...      1  [157, 1184, 15, ...\n",
      "20156            #해모나  셀기꾼 SNS 인기스타인 모나...  2017023364  https://www.kmas...           이성애           신매      배틀코믹스  셀기 꾼 인기 스타인 모나 날...      0  [56752, 6206, 15...\n",
      "\n",
      "[20157 rows x 10 columns]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "outlines_enc = [[word2int[word] for word in outline.split()] for outline in tqdm(outlines)]\n",
    "df['encoded'] = outlines_enc\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "509e8624",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "리뷰의 최대 길이 : 322\n",
      "리뷰의 평균 길이 : 42.46579352086124\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGiCAYAAADNzj2mAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+sUlEQVR4nO3df1RWZb7//9ctyi0q3IkENywRmdHKgjyFjUiZmIpyQiN07OSJo2scszFt8QGPJ2zNGpp1gqZJbZamk3NcWv6I1vGgNWkmnibU/JGSLqVRswlKJwgzvAFlbhT2948O++st+AOF+97A87HWXsPe+83muhpvfHnta1/bZhiGIQAAAAvp5usGAAAAXImAAgAALIeAAgAALIeAAgAALIeAAgAALIeAAgAALIeAAgAALIeAAgAALIeAAgAALIeAAgAALIeAAuCW5OXl6YEHHlBgYKBCQ0OVmpqqEydOeNTMmDFDNpvNY4uPj/eocbvdmjdvnkJCQtS7d29NmjRJp0+f9qipqqpSenq6HA6HHA6H0tPTde7cufbuIgAfIKAAuCVFRUV69tlntW/fPhUWFurSpUtKSkrS+fPnPeomTJig8vJyc9u6davH+YyMDG3atEn5+fnavXu3amtrlZKSooaGBrNm2rRpOnz4sLZt26Zt27bp8OHDSk9P90o/AXiXjZcFAmhLZ86cUWhoqIqKivTwww9L+nEE5dy5c9q8eXOL3+NyuXT77bdr7dq1euKJJyRJ3377rSIjI7V161aNHz9ex44d09133619+/Zp+PDhkqR9+/ZpxIgROn78uO68806v9A+Ad3T3dQNuRmNjo7799lsFBgbKZrP5ujlAl2QYhmpqahQREaFu3f7/wViXyyVJCg4O9qj/+OOPFRoaqttuu02jRo3SSy+9pNDQUElScXGxLl68qKSkJLM+IiJCMTEx2rNnj8aPH6+9e/fK4XCY4USS4uPj5XA4tGfPnhYDitvtltvtNvcbGxv1ww8/qF+/fvzuAHzgar83WtIhA0rTv6wA+N6pU6fUv39/ST/+8snMzNRDDz2kmJgYsyY5OVk///nPFRUVpdLSUv3617/WI488ouLiYtntdlVUVMjf3199+/b1uHZYWJgqKiokSRUVFWaguVxoaKhZc6W8vDy9+OKLbdVVAG3k8t8bV9MhA0pgYKCkHzsYFBTk49YAXVN1dbUiIyPNz6MkzZ07V0eOHNHu3bs9aptu20hSTEyMhg0bpqioKG3ZskVpaWlX/RmGYXiMdLQ06nFlzeWys7OVmZlp7rtcLg0YMIDfHYCPtPR742o6ZEBp+mUUFBTELxnAx5o+j/PmzdN7772nnTt3XvdfRuHh4YqKitLJkyclSU6nU/X19aqqqvIYRamsrFRCQoJZ89133zW71pkzZxQWFtbiz7Hb7bLb7c2O87sD8K0bucXKUzwAbolhGJo7d64KCgr00UcfKTo6+rrfc/bsWZ06dUrh4eGSpLi4OPXo0UOFhYVmTXl5uUpKSsyAMmLECLlcLn366admzf79++VyucwaAJ1HhxxBAWAdWVlZ2rhxo959910FBgaa80EcDocCAgJUW1urnJwcTZ48WeHh4SorK9PChQsVEhKixx9/3KydOXOmsrKy1K9fPwUHB2v+/PmKjY3V2LFjJUlDhgzRhAkTNGvWLL3xxhuSpKefflopKSk8wQN0QgQUALdk1apVkqTExESP46tXr9aMGTPk5+eno0eP6q233tK5c+cUHh6u0aNH65133vG4D71kyRJ1795dU6dOVV1dncaMGaM1a9bIz8/PrFm/fr2ee+4582mfSZMmadmyZe3fSQBe1yHXQamurpbD4ZDL5eI+MuAjHfFz2BHbDHQmrfkMMgcFAABYDgEFAABYDgEFAABYDgEFAABYDk/xwOsaGhq0a9culZeXKzw8XCNHjvR4UgMAAEZQ4FUFBQUaNGiQRo8erWnTpmn06NEaNGiQCgoKfN00AICFEFDgNQUFBZoyZYpiY2O1d+9e1dTUaO/evYqNjdWUKVMIKQAAE+ugwCsaGho0aNAgxcbGavPmzR6v2W5sbFRqaqpKSkp08uRJbvd0EB3xc9gR2wx0JqyDAsvZtWuXucT55eFEkrp166bs7GyVlpZq165dPmohAMBKCCjwivLycklSTExMi+ebjjfVAQC6Np7igVc0vbW2pKREDzzwQLOneEpKSjzqAKsZ+PwWj/2ylx/1UUuAroGAAq8YOXKkBg4cqHnz5un7779XWVmZeW7gwIEKCQlRdHS0Ro4c6btGAgAsg1s88Ao/Pz/9/Oc/18GDB1VXV6eVK1fq22+/1cqVK1VXV6eDBw9qypQpTJAFAEgioMBLGhoa9N///d8aNmyY7Ha7nn76aUVEROjpp59Wz549NWzYMG3cuFENDQ2+bioAwAIIKPCKpqd4Jk+e3OwpHpvNprS0NJ7iAQCYCCjwiqanc7Kzs1tcqG3hwoUedQCAro1JsvCK0NBQSdJDDz3ksVBbfHy8Nm/erIcffliffPKJWQcA6NoYQYEl2Gw2XzcBAGAhBBR4RWVlpSTpk08+UWpqqsctntTUVH3yyScedQCAro2AAq9oWoAtNzdXR48eVUJCgoKCgpSQkKCSkhK99NJLHnUAgK6NOSjwiqaF2vbs2aMvvvhCn3zyibmS7IMPPqjJkyezUBsAwMQICrzCz89PixYt0vvvv6/JkyfLbrcrJSVFdrtdkydP1vvvv69XX32VhdoAAJIYQYEXpaWlaePGjcrKylJCQoJ5PDo6Whs3blRaWpoPWwcAsBICCrwqLS1Njz32WLOXBTJyAgC4HAEFXufn56fExERfNwMAYGHMQQEAAJbDCAq8rqGhgVs8AIBrYgQFXlVQUKBBgwZp9OjRmjZtmkaPHq1BgwapoKDA100DAFgIAQVeU1BQoClTprT4ssApU6YQUgAAplYFlBUrVujee+9VUFCQgoKCNGLECH3wwQfmecMwlJOTo4iICAUEBCgxMVGff/65xzXcbrfmzZunkJAQ9e7dW5MmTdLp06fbpjewrIaGBmVlZSklJUWbN29WfHy8+vTpY74sMCUlRfPnz1dDQ4OvmwoAsIBWBZT+/fvr5Zdf1sGDB3Xw4EE98sgjeuyxx8wQ8sorr2jx4sVatmyZDhw4IKfTqXHjxqmmpsa8RkZGhjZt2qT8/Hzt3r1btbW1SklJ4S+mTm7Xrl0qKyvTwoULzTcZN+nWrZuys7NVWlqqXbt2+aiFAAAraVVAmThxov75n/9Zd9xxh+644w699NJL6tOnj/bt2yfDMPTaa6/phRdeUFpammJiYvTmm2/qwoUL2rBhgyTJ5XJp1apVWrRokcaOHav77rtP69at09GjR7Vjx46r/ly3263q6mqPDR1LeXm5JCkmJqbF803Hm+oAAF3bTc9BaWhoUH5+vs6fP68RI0aotLRUFRUVSkpKMmvsdrtGjRqlPXv2SJKKi4t18eJFj5qIiAjFxMSYNS3Jy8uTw+Ewt8jIyJttNnyk6SWAJSUlLZ5vOs7LAgEA0k0ElKNHj6pPnz6y2+165plntGnTJt19992qqKiQJIWFhXnUh4WFmecqKirk7++vvn37XrWmJdnZ2XK5XOZ26tSp1jYbPtb0ssDc3Fw1NjZ6nGtsbFReXh4vCwQAmFodUO68804dPnxY+/bt069+9StNnz5df/3rX83zNpvNo94wjGbHrnS9Grvdbk7MbdrQsVz+ssDU1FSPp3hSU1N5WSAAwEOrA4q/v78GDRqkYcOGKS8vT0OHDtUf/vAHOZ1OSWo2ElJZWWmOqjidTtXX16uqquqqNei8ml4WePToUSUkJCgoKEgJCQkqKSnhZYEAAA+3vA6KYRhyu92Kjo6W0+lUYWGhea6+vl5FRUXmm2vj4uLUo0cPj5ry8nKVlJR4vN0WnVdaWppOnDihJUuWaO7cuVqyZImOHz9OOAEAeGjVUvcLFy5UcnKyIiMjVVNTo/z8fH388cfatm2bbDabMjIylJubq8GDB2vw4MHKzc1Vr169NG3aNEmSw+HQzJkzlZWVpX79+ik4OFjz589XbGysxo4d2y4dhLUUFBQoKytLZWVl5rE//OEPWrRoESEFAGBq1QjKd999p/T0dN15550aM2aM9u/fr23btmncuHGSpAULFigjI0Nz5szRsGHD9Pe//13bt29XYGCgeY0lS5YoNTVVU6dO1YMPPqhevXrpz3/+M3MPugBWkgUA3CibYRiGrxvRWtXV1XI4HHK5XEyY7SAaGho0aNAgxcbGavPmzR6LtTU2Nio1NVUlJSU6efIkYbWD6Iifw1tp88Dnt3jsl738aFs2DegSWvMZ5F088ApWkgUAtAYBBV7BSrIAgNYgoMArWEkWANAaBBR4BSvJAgBag4ACr2AlWQBAa7RqHRTgVjStJJuVleWxMF90dDQryQIAPBBQ4FVpaWl67LHHtGvXLpWXlys8PFwjR45k5AQA4IGAAq/z8/NTYmKir5sBALAw5qAAAADLIaAAAADLIaAAAADLIaAAAADLIaAAAADLIaAAAADLIaAAAADLIaAAAADLIaAAAADLIaAAAADLYal7eF1DQwPv4gEAXBMjKPCqgoICDRo0SKNHj9a0adM0evRoDRo0SAUFBb5uGgDAQggo8JqCggJNmTJFsbGx2rt3r2pqarR3717FxsZqypQphBQAgImAAq9oaGhQVlaWUlJStHnzZsXHx6tPnz6Kj4/X5s2blZKSovnz56uhocHXTQUAWAABBV6xa9culZWVaeHCherWzfOPXbdu3ZSdna3S0lLt2rXLRy0EAFgJAQVeUV5eLkmKiYlp8XzT8aY6dByLFi3SAw88oMDAQIWGhio1NVUnTpzwqDEMQzk5OYqIiFBAQIASExP1+eefe9S43W7NmzdPISEh6t27tyZNmqTTp0971FRVVSk9PV0Oh0MOh0Pp6ek6d+5ce3cRgA8QUOAV4eHhkqSSkpIWzzcdb6pDx/HJJ5/o2Wef1b59+1RYWKhLly4pKSlJ58+fN2teeeUVLV68WMuWLdOBAwfkdDo1btw41dTUmDUZGRnatGmT8vPztXv3btXW1iolJcXjtt+0adN0+PBhbdu2Tdu2bdPhw4eVnp7u1f42Gfj8Fo8NQNuyGYZh+LoRrVVdXS2HwyGXy6WgoCBfNwc3oKGhQYMGDVJsbKw2b97scZunsbFRqampKikp0cmTJ3nkuIO42ufwzJkzCg0NVVFRkR5++GEZhqGIiAhlZGToP/7jPyT9OFoSFham3/3ud5o9e7ZcLpduv/12rV27Vk888YQk6dtvv1VkZKS2bt2q8ePH69ixY7r77ru1b98+DR8+XJK0b98+jRgxQsePH9edd955022+EdcLIWUvP9qq6wFdUWs+g4ygwCv8/Py0aNEivf/++0pNTfV4iic1NVXvv/++Xn31VcJJJ+ByuSRJwcHBkqTS0lJVVFQoKSnJrLHb7Ro1apT27NkjSSouLtbFixc9aiIiIhQTE2PW7N27Vw6HwwwnkhQfHy+Hw2HWXMntdqu6utpjA9AxEFDgNWlpadq4caOOHj2qhIQEBQUFKSEhQSUlJdq4caPS0tJ83UTcIsMwlJmZqYceesicV1RRUSFJCgsL86gNCwszz1VUVMjf3199+/a9Zk1oaGiznxkaGmrWXCkvL8+cr+JwOBQZGXlrHQTgNawkC69KS0vTY489xkqyndTcuXN15MgR7d69u9k5m83msW8YRrNjV7qypqX6a10nOztbmZmZ5n51dTUhBeggCCjwOj8/PyUmJvq6GWhj8+bN03vvvaedO3eqf//+5nGn0ynpxxGQyydBV1ZWmqMqTqdT9fX1qqqq8hhFqaysVEJCglnz3XffNfu5Z86caTY608Rut8tut9965wB4Hbd4ANwSwzA0d+5cFRQU6KOPPlJ0dLTH+ejoaDmdThUWFprH6uvrVVRUZIaPuLg49ejRw6OmvLxcJSUlZs2IESPkcrn06aefmjX79++Xy+UyawB0HoygALglWVlZ2rhxo959910FBgaa80EcDocCAgJks9mUkZGh3NxcDR48WIMHD1Zubq569eqladOmmbUzZ85UVlaW+vXrp+DgYM2fP1+xsbEaO3asJGnIkCGaMGGCZs2apTfeeEOS9PTTTyslJeWGnuAB0LEQUADcklWrVklSs9t2q1ev1owZMyRJCxYsUF1dnebMmaOqqioNHz5c27dvV2BgoFm/ZMkSde/eXVOnTlVdXZ3GjBmjNWvWeMxPWr9+vZ577jnzaZ9JkyZp2bJl7dtBAD7BOigAbkpH/ByyDgrgW6yDAgAAOjQCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsJxWBZS8vDw98MADCgwMVGhoqFJTU3XixAmPmhkzZshms3ls8fHxHjVut1vz5s1TSEiIevfurUmTJun06dO33hsAANAptCqgFBUV6dlnn9W+fftUWFioS5cuKSkpSefPn/eomzBhgsrLy81t69atHuczMjK0adMm5efna/fu3aqtrVVKSooaGhpuvUcAAKDDa9VS99u2bfPYX716tUJDQ1VcXKyHH37YPG632803mF7J5XJp1apVWrt2rfmOjXXr1ikyMlI7duzQ+PHjW9sHAADQydzSHBSXyyVJCg4O9jj+8ccfKzQ0VHfccYdmzZqlyspK81xxcbEuXrxovktDkiIiIhQTE6M9e/a0+HPcbreqq6s9NgAA0HnddEAxDEOZmZl66KGHFBMTYx5PTk7W+vXr9dFHH2nRokU6cOCAHnnkEbndbklSRUWF/P391bdvX4/rhYWFmW9BvVJeXp4cDoe5RUZG3myzAQBAB3DTbzOeO3eujhw5ot27d3scf+KJJ8yvY2JiNGzYMEVFRWnLli1KS0u76vUMw5DNZmvxXHZ2tjIzM8396upqQgoAAJ3YTY2gzJs3T++9957+8pe/qH///tesDQ8PV1RUlE6ePClJcjqdqq+vV1VVlUddZWWlwsLCWryG3W5XUFCQxwYAADqvVgUUwzA0d+5cFRQU6KOPPlJ0dPR1v+fs2bM6deqUwsPDJUlxcXHq0aOHCgsLzZry8nKVlJQoISGhlc0HAACdUatu8Tz77LPasGGD3n33XQUGBppzRhwOhwICAlRbW6ucnBxNnjxZ4eHhKisr08KFCxUSEqLHH3/crJ05c6aysrLUr18/BQcHa/78+YqNjTWf6gEAAF1bqwLKihUrJEmJiYkex1evXq0ZM2bIz89PR48e1VtvvaVz584pPDxco0eP1jvvvKPAwECzfsmSJerevbumTp2quro6jRkzRmvWrJGfn9+t9wgAAHR4rQoohmFc83xAQIA+/PDD616nZ8+eWrp0qZYuXdqaHw8AALoI3sUDAAAs56YfMwYA3JqBz2/x2C97+VEftQSwHgIKvK6hoUG7du1SeXm5wsPDNXLkSOYfAQA8cIsHXlVQUKBBgwZp9OjRmjZtmkaPHq1BgwapoKDA100DAFgIAQVeU1BQoClTpig2NlZ79+5VTU2N9u7dq9jYWE2ZMoWQgk5n4PNbPDYAN46AAq9oaGhQVlaWUlJStHnzZsXHx6tPnz6Kj4/X5s2blZKSovnz56uhocHXTQUAWAABBV6xa9cuc+G+bt08/9h169ZN2dnZKi0t1a5du3zUQgCAlRBQ4BXl5eWS5PHm68s1HW+qAwB0bQQUeEXTu5hKSkpaPN90vKkOANC1EVDgFSNHjtTAgQOVm5urxsZGj3ONjY3Ky8tTdHS0Ro4c6aMWAgCshIACr/Dz89OiRYv0/vvvKzU11eMpntTUVL3//vt69dVXWQ8FACCJhdrgRWlpadq4caOysrKUkJBgHo+OjtbGjRuVlpbmw9YBAKyEgAKvSktLU3Jysv793/9dJ0+e1ODBg/X73/9eAQEBvm4aAMBCuMUDr1qwYIGCgoL0+uuva/v27Xr99dcVFBSkBQsW+LppAAALIaDAaxYsWKDf//73LU6S/f3vf09IAQCYCCjwivr6ei1atEiSWgwokrRo0SLV19d7vW0AAOshoMArli5dagaRllaSlX4MKkuXLvV62wAA1kNAgVfs3LnT/Do5OdnjMePk5OQW6wAAXRdP8cArTp8+LenHlWLfe+89c9QkPj5e7733nvr376/y8nKzDgDQtTGCAq/o2bOnJKmmpqbFOSg1NTUedQCAro2AAq+Ijo6WJNXW1qp///5auXKlvv32W61cuVL9+/dXbW2tRx0AoGvjFg+84t/+7d+0fv16SVJlZaVmz55tnrPZbB51AAAwggKvGDNmjIKCgiRJhmF4nGvaDwoK0pgxY7zeNgCA9RBQ4BV+fn5avXr1NWtWr17NywIBAJIIKPCiffv23dJ5AEDXQUCBV1y+kqy/v7/HuaZ9VpIFADQhoMArLl9J9soQ0rTPSrIAgCYEFHjFrl272rQOANC5EVDgFdXV1ebXV7vFc2UdAKDrYh0UeMW5c+fMry9evOhx7vL9y+uAzmbg81t83QSgw2AEBV7RtFJsk7Fjxyo3N1djx469Zh0AoGtiBAVe0adPH/NrwzC0Y8cO7dix45p1AICuixEUeMVtt93WpnUAgM6NgAKvCAwMbNM6AEDnRkCBV0RERLRpHQCgcyOgwCsSEhLMr6/1mPHldQCArouAAq+IjIw0v75yJdnLHzO+vA4A0HURUOAVI0eO1MCBA+VwOJqdMwxDDodD0dHRGjlypA9aBwCwGgIKvMLPz09Dhw6Vy+WSv7+/7rvvPj344IO677775O/vL5fLpXvvvVd+fn6+bioAwAJYBwVeUV9fry1btqhXr176xz/+oUOHDpnnunXrpl69emnLli2qr69vNkcFAND1EFDgFcuXL9elS5d06dIlPfrooxo0aJDq6uoUEBCgL7/8Ulu2bDHrMjIyfNtYAIDPtSqg5OXlqaCgQMePH1dAQIASEhL0u9/9TnfeeadZYxiGXnzxRa1cuVJVVVUaPny4Xn/9dd1zzz1mjdvt1vz58/X222+rrq5OY8aM0fLly9W/f/+26xks5eTJk5Kke++9V0ePHjUDiSQNGDBA9957r44cOWLWAR0N79kB2lar5qAUFRXp2Wef1b59+1RYWKhLly4pKSlJ58+fN2teeeUVLV68WMuWLdOBAwfkdDo1btw41dTUmDUZGRnatGmT8vPztXv3btXW1iolJUUNDQ1t1zNYis1mkyQdOXJE33zzjce5b775RkeOHPGoAwB0ba0aQdm2bZvH/urVqxUaGqri4mI9/PDDMgxDr732ml544QWlpaVJkt58802FhYVpw4YNmj17tlwul1atWqW1a9eaL4pbt26dIiMjtWPHDo0fP76NugYreeCBB9q0DgDQud3SUzwul0uSFBwcLEkqLS1VRUWFkpKSzBq73a5Ro0Zpz549kqTi4mJdvHjRoyYiIkIxMTFmzZXcbreqq6s9NnQsZ86cadM6AEDndtMBxTAMZWZm6qGHHlJMTIwkqaKiQpIUFhbmURsWFmaeq6iokL+/v/r27XvVmivl5eXJ4XCYG4t5dTyfffZZm9bBOj755BNNnDhRERERstls2rx5s8f5GTNmyGazeWzx8fEeNW63W/PmzVNISIh69+6tSZMm6fTp0x41VVVVSk9PN38PpKen69y5c+3cOwC+ctMBZe7cuTpy5IjefvvtZueunEdgGMZ15xZcqyY7O1sul8vcTp06dbPNho98/fXX5te33367Ro0aZW633357i3XoGC5cuKChQ4dq2bJlV62ZMGGCysvLzW3r1q0e529kXtq0adN0+PBhbdu2Tdu2bdPhw4eVnp7ebv0C4Fs39ZjxvHnz9N5772nnzp0eT944nU5JP46ShIeHm8crKyvNURWn06n6+npVVVV5jKJUVlZe9T0sdrtddrv9ZpoKi6irq5P044JtP/zwg4qKisxzfn5+8vPzU0NDg1mHjmPcuHGaPHnyNWvsdrv5++FKNzIv7dixY9q2bZv27dun4cOHS5L+9Kc/acSIETpx4oTHk4QAOodWjaAYhqG5c+eqoKBAH330kaKjoz3OR0dHy+l0qrCw0DxWX1+voqIiM3zExcWpR48eHjXl5eUqKSnhRXGdWEBAgCSpoaFB/fr1U2Jioh5++GElJiaqX79+5r+Um+rQuXz88ccKDQ3VHXfcoVmzZqmystI8dyPz0vbu3SuHw2GGE0mKj4+Xw+G46tw1iflrQEfWqhGUZ599Vhs2bNC7776rwMBAc86Iw+FQQECAbDabMjIylJubq8GDB2vw4MHKzc1Vr169NG3aNLN25syZysrKUr9+/RQcHKz58+crNjbW/NcTOp+oqCjzL5LKykqPv6CurEPnkpycrJ///OeKiopSaWmpfv3rX+uRRx5RcXGx7Hb7Dc1Lq6ioUGhoaLNrh4aGXnXumvTj/LUXX3yxbTsEwCtaFVBWrFghSUpMTPQ4vnr1as2YMUOStGDBAtXV1WnOnDnmQm3bt29XYGCgWb9kyRJ1795dU6dONRdqW7NmDe9h6cTuu+++FucrtVSHzuWJJ54wv46JidGwYcMUFRWlLVu2mMsRtOTKeWktzVG73vy27OxsZWZmmvvV1dVMsgc6iFYFFMMwrltjs9mUk5OjnJycq9b07NlTS5cu1dKlS1vz49GBtfSv31upQ8cVHh6uqKgoc9XgG5mX5nQ69d133zW71pkzZ5o9NXg55q8BHRdvM4ZXHDhwoE3r0HGdPXtWp06dMifS38i8tBEjRsjlcunTTz81a/bv3y+Xy8XcNaCT4mWB8IrLJ8HW19d7PD7q5+cnf39/1dXV8bqDDqi2tlZfffWVuV9aWqrDhw8rODhYwcHBysnJ0eTJkxUeHq6ysjItXLhQISEhevzxxyXd2Ly0IUOGaMKECZo1a5beeOMNSdLTTz+tlJQUnuABOikCCryiaX5RXV1dszkDjY2NHo8ho2M5dOiQUlJSzP2mOR/Tp0/XihUrdPToUb311ls6d+6cwsPDNXr0aL3zzjutnpe2fv16Pffcc+bTPpMmTbrm2isAOjYCCrzi8nfs9OzZ02O9k8v3eRdPxzNy5Mhrzk/78MMPr3uNG5mXFhwcrHXr1t1UGwF0PMxBgVd8//335tdXLsZ2+f7ldQCArouAAq/44Ycf2rQOANC5cYsHXtHY2Gh+nZycrF69epmPlV64cEEffPBBszoAQNdFQIFXNL119rbbbtOHH37oEUS6deum2267TefOnePttAAASQQUeEm3bj/eTWwpgDQ2NprHm+qArmjg81s89steftRHLQF8j78N4BU/+clP2rQOANC5EVDgFVe++fpW6wAAnRsBBV7x0ksvmV/7+/vrySef1KJFi/Tkk0/K39+/xToAQNfFHBR4RVVVlSQpJCRE586d09tvv22+3bh79+7q16+fzp49a9YBALo2Agq8Ijw8XKWlpbLZbKqpqdEf//hH/e1vf9NPf/pTPfPMM4qMjDTrAADgFg+84pe//KUk6cyZM0pLS9Pw4cOVm5ur4cOHKy0tzVxBtqkOANC1MYICr7h88usHH3xgLsx2rToAQNfFCAq8YuTIkRo4cKDHhNjL+fv7Kzo6WiNHjvRyywAAVsQICrzCz89PQ4cOVVlZmXr06KGoqCj5+fmpoaFBX3/9terr63XvvffKz8/P100FAFgAAQVeUV9fry1btsjhcCgoKEhffvmleW7AgAFyuVzasmWL6uvrrzrKAgDoOrjFA69Yvny5Ll26pPj4eJWXl3uc+/bbbzV8+HBdunRJy5cv91ELAQBWwggKvOJvf/ubJOnDDz9UWFiY0tPT9ZOf/ERfffWV1q5dq+3bt3vUAQC6NgIKvGLAgAGSpICAAPXs2VOvvvqqeS4qKkoBAQGqq6sz6wAAXRsBBV5VV1ene+65RwsWLDBDyZYtW/T111/7umkAAAshoMArysrKzK8/+OADbd261dy32Wwt1gEAui4mycIrLg8hhmF4nLt8//I6AEDXRUCBVzzwwANtWgcA6Ny4xQOvOHv2bJvWAV3BwOe3eOyXvfyoj1oCeB8jKPCKK9c+udU6AEDnRkCBV7z77rttWgcA6Ny4xQOvOHPmTJvWAV0Rt3zQlRBQ4BX19fUe+8OGDdNPf/pT/e1vf9PBgwevWgcA6JoIKPCKKx8fPnjwoEcwuVodAKBrYg4KAACwHAIKvMLhcLRpHQCgcyOgwCvuvffeNq0DAHRuBBR4hdvtbtM6AEDnRkCBV1RXV7dpHQCgcyOgwCucTmeb1gEAOjcCCrwiICCgTesAAJ0bAQVecfz48TatAwB0bgQUeEVDQ0Ob1gEAOrdWB5SdO3dq4sSJioiIkM1m0+bNmz3Oz5gxQzabzWOLj4/3qHG73Zo3b55CQkLUu3dvTZo0SadPn76ljsDabnQJe5a6BwBINxFQzp8/r6FDh2rZsmVXrZkwYYLKy8vNbevWrR7nMzIytGnTJuXn52v37t2qra1VSkoK/3ruxGpqatq0DgDQubX6XTzJyclKTk6+Zo3dbr/q0xgul0urVq3S2rVrNXbsWEnSunXrFBkZqR07dmj8+PGtbRI6gLNnz7ZpHQCgc2uXOSgff/yxQkNDdccdd2jWrFmqrKw0zxUXF+vixYtKSkoyj0VERCgmJkZ79uxp8Xput1vV1dUeGzoWwzDatA4A0Lm1eUBJTk7W+vXr9dFHH2nRokU6cOCAHnnkEXOF0IqKCvn7+6tv374e3xcWFqaKiooWr5mXlyeHw2FukZGRbd1stDM/P782rQMAdG5tHlCeeOIJPfroo4qJidHEiRP1wQcf6IsvvtCWLVuu+X2GYchms7V4Ljs7Wy6Xy9xOnTrV1s1GO+vdu3eb1gEAOrd2f8w4PDxcUVFROnnypKQfVwqtr69XVVWVR11lZaXCwsJavIbdbldQUJDHho6FSbIAgNZo94By9uxZnTp1SuHh4ZKkuLg49ejRQ4WFhWZNeXm5SkpKlJCQ0N7NgY8wBwUA0BqtfoqntrZWX375pblfWlqqw4cPKzg4WMHBwcrJydHkyZMVHh6usrIyLVy4UCEhIXr88cclSQ6HQzNnzlRWVpb69eun4OBgzZ8/X7GxseZTPeh8/Pz81NjYeEN1AAC0OqAcPHhQo0ePNvczMzMlSdOnT9eKFSt09OhRvfXWWzp37pzCw8M1evRovfPOOwoMDDS/Z8mSJerevbumTp2quro6jRkzRmvWrOEvp07savOLbrYOANC5tTqgJCYmXnMY/sMPP7zuNXr27KmlS5dq6dKlrf3x6KBuZPSkNXUAgM6Nd/HAK5iDAgBoDQIKAACwHAIKvII5KACA1iCgAAAAyyGgwCsuXbrUpnUAgM6NgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgALglnzyySeaOHGiIiIiZLPZtHnzZo/zhmEoJydHERERCggIUGJioj7//HOPGrfbrXnz5ikkJES9e/fWpEmTdPr0aY+aqqoqpaeny+FwyOFwKD09XefOnWvn3gHwFQIKgFty4cIFDR06VMuWLWvx/CuvvKLFixdr2bJlOnDggJxOp8aNG6eamhqzJiMjQ5s2bVJ+fr52796t2tpapaSkqKGhwayZNm2aDh8+rG3btmnbtm06fPiw0tPT271/AHyju68bAKBjGzdunCZPntziOcMw9Nprr+mFF15QWlqaJOnNN99UWFiYNmzYoNmzZ8vlcmnVqlVau3atxo4dK0lat26dIiMjtWPHDo0fP17Hjh3Ttm3btG/fPg0fPlyS9Kc//UkjRozQiRMndOedd3qnswC8hhEUAO2mtLRUFRUVSkpKMo/Z7XaNGjVKe/bskSQVFxfr4sWLHjURERGKiYkxa/bu3SuHw2GGE0mKj4+Xw+Ewa1ridrtVXV3tsQHoGAgoANpNRUWFJCksLMzjeFhYmHmuoqJC/v7+6tu37zVrQkNDm10/NDTUrGlJXl6eOWfF4XAoMjLylvoDwHsIKADanc1m89g3DKPZsStdWdNS/fWuk52dLZfLZW6nTp1qZcsB+AoBBUC7cTqdktRslKOystIcVXE6naqvr1dVVdU1a7777rtm1z9z5kyz0ZnL2e12BQUFeWwAOgYCCoB2Ex0dLafTqcLCQvNYfX29ioqKlJCQIEmKi4tTjx49PGrKy8tVUlJi1owYMUIul0uffvqpWbN//365XC6zBkDnwlM8AG5JbW2tvvrqK3O/tLRUhw8fVnBwsAYMGKCMjAzl5uZq8ODBGjx4sHJzc9WrVy9NmzZNkuRwODRz5kxlZWWpX79+Cg4O1vz58xUbG2s+1TNkyBBNmDBBs2bN0htvvCFJevrpp5WSksITPEAnRUBBu7hw4YKOHz9+U9/72Wefeezfdddd6tWrV1s0C+3g0KFDSklJMfczMzMlSdOnT9eaNWu0YMEC1dXVac6cOaqqqtLw4cO1fft2BQYGmt+zZMkSde/eXVOnTlVdXZ3GjBmjNWvWyM/Pz6xZv369nnvuOfNpn0mTJl117RUAHZ/NMAzD141orerqajkcDrlcLu4pW9Rnn32muLi4NrlWcXGx7r///ja5FtpOR/wc3kqbBz6/pZ1adfPKXn7U100AWqU1n0FGUNAu7rrrLhUXF5v7hw8f1syZM6/7fatWrdI//dM/NbsWAKBrIaCgXfTq1ctj1OP++++/oYDyi1/8oj2bBQDoIHiKB15zvbuJHfBuIwCgnRBQ4FWGYWj//v0ex/bv3084AQB4IKDA6372s5+Z81OKi4v1s5/9zMctAgBYDQEFAABYDpNkAaCDuvLRZx47RmfCCAoAALAcAgoAALAcAgoAALAcAgoAALAcAgoAALAcAgoAALAcAgoAALAcAgoAALCcVgeUnTt3auLEiYqIiJDNZtPmzZs9zhuGoZycHEVERCggIECJiYn6/PPPPWrcbrfmzZunkJAQ9e7dW5MmTdLp06dvqSMAAKDzaHVAOX/+vIYOHaply5a1eP6VV17R4sWLtWzZMh04cEBOp1Pjxo1TTU2NWZORkaFNmzYpPz9fu3fvVm1trVJSUtTQ0HDzPQGALm7g81s8NqAja/VS98nJyUpOTm7xnGEYeu211/TCCy8oLS1NkvTmm28qLCxMGzZs0OzZs+VyubRq1SqtXbtWY8eOlSStW7dOkZGR2rFjh8aPH38L3QEAAJ1Bm76Lp7S0VBUVFUpKSjKP2e12jRo1Snv27NHs2bNVXFysixcvetREREQoJiZGe/bsaTGguN1uud1uc7+6urotmw0AzTACAfhWm06SraiokCSFhYV5HA8LCzPPVVRUyN/fX3379r1qzZXy8vLkcDjMLTIysi2bDQAALKZdnuKx2Wwe+4ZhNDt2pWvVZGdny+VymdupU6farK0AAMB62jSgOJ1OSWo2ElJZWWmOqjidTtXX16uqquqqNVey2+0KCgry2AAAQOfVpgElOjpaTqdThYWF5rH6+noVFRUpISFBkhQXF6cePXp41JSXl6ukpMSsAQAAXVurJ8nW1tbqyy+/NPdLS0t1+PBhBQcHa8CAAcrIyFBubq4GDx6swYMHKzc3V7169dK0adMkSQ6HQzNnzlRWVpb69eun4OBgzZ8/X7GxseZTPQAAoGtrdUA5ePCgRo8ebe5nZmZKkqZPn641a9ZowYIFqqur05w5c1RVVaXhw4dr+/btCgwMNL9nyZIl6t69u6ZOnaq6ujqNGTNGa9askZ+fXxt0CQAAdHStDiiJiYkyDOOq5202m3JycpSTk3PVmp49e2rp0qVaunRpa388AADoAngXDwAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsBwCCgAAsJzuvm4AAKB9DHx+i8d+2cuP+qglQOsxggIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHgAIAACyHlWRxU06ePKmampqb/v5jx455/O/NCgwM1ODBg2/pGgAA6yGgoNVOnjypO+64o02u9dRTT93yNb744gtCCgB0MgQUtFrTyMm6des0ZMiQm7pGXV2dysrKNHDgQAUEBNzUNY4dO6annnrqlkZy4B05OTl68cUXPY6FhYWpoqJCkmQYhl588UWtXLlSVVVVGj58uF5//XXdc889Zr3b7db8+fP19ttvq66uTmPGjNHy5cvVv39/r/alI7vy3TwS7+eBdRFQcNOGDBmi+++//6a//8EHH2zD1sDq7rnnHu3YscPc9/PzM79+5ZVXtHjxYq1Zs0Z33HGH/vM//1Pjxo3TiRMnFBgYKEnKyMjQn//8Z+Xn56tfv37KyspSSkqKiouLPa4FoHMgoADwiu7du8vpdDY7bhiGXnvtNb3wwgtKS0uTJL355psKCwvThg0bNHv2bLlcLq1atUpr167V2LFjJf04ghcZGakdO3Zo/PjxLf5Mt9stt9tt7ldXV7dDzwC0B57iAeAVJ0+eVEREhKKjo/Uv//Iv+uqrryRJpaWlqqioUFJSkllrt9s1atQo7dmzR5JUXFysixcvetREREQoJibGrGlJXl6eHA6HuUVGRrZT7wC0tTYPKDk5ObLZbB7b5f9qMgxDOTk5ioiIUEBAgBITE/X555+3dTMAWMjw4cP11ltv6cMPP9Sf/vQnVVRUKCEhQWfPnjXnoYSFhXl8z+VzVCoqKuTv76++fftetaYl2dnZcrlc5nbq1Kk27hmA9tIut3hu9V4zgM4lOTnZ/Do2NlYjRozQT3/6U7355puKj4+XJNlsNo/vMQyj2bErXa/GbrfLbrffQssB+Eq73OJputfctN1+++2Smt9rjomJ0ZtvvqkLFy5ow4YN7dEUABbUu3dvxcbG6uTJk+YI65UjIZWVleaoitPpVH19vaqqqq5aA6BzaZeAciv3mlvidrtVXV3tsQHouNxut44dO6bw8HBFR0fL6XSqsLDQPF9fX6+ioiIlJCRIkuLi4tSjRw+PmvLycpWUlJg1ADqXNg8ot3qvuSVMdAM6tvnz56uoqEilpaXav3+/pkyZourqak2fPl02m00ZGRnKzc3Vpk2bVFJSohkzZqhXr16aNm2aJMnhcGjmzJnKysrS//7v/+rQoUN66qmnFBsbaz7VA6BzafM5KO1xrzk7O1uZmZnmfnV1NSEF6EBOnz6tJ598Ut9//71uv/12xcfHa9++fYqKipIkLViwQHV1dZozZ465UNv27ds95qUtWbJE3bt319SpU82F2tasWcMaKEAn1e7roFx+rzk1NVXSj/eaw8PDzZrr3UdmohvQseXn51/zvM1mU05OjnJycq5a07NnTy1dulRLly5t49YBsKJ2XweltfeaAQAA2nwEZf78+Zo4caIGDBigyspK/ed//meL95oHDx6swYMHKzc31+NeMwAAQJsHlLa41wwAALq2Ng8obXGvGQAAdG28iwcAAFgOAQUAAFgOAQUAAFgOAQUAAFgOAQUAAFhOu68ki87Hdukfus/ZTQHnvpC+9V3GDTj3he5zdpPt0j981gYAQPsgoKDVetZ+o89m95F2zpZ2+q4dQyR9NruPjtV+I4mViAGgMyGgoNX+0WeA7n+jVuvXr9eQu+7yWTuOHT+uf/3Xf9Wqfx7gszYAANoHAQWtZnTvqUMVjaq77Q4p4p981o66ikYdqmiU0b2nz9oAAGgfTJIFAACWQ0ABAACWQ0ABAACWQ0ABAACWQ0ABAACWQ0ABAACWQ0ABAACWQ0ABAACWw0JtANCFDXx+i8d+2cuP+qglgCdGUAAAgOUQUAAAgOUQUAAAgOUQUAAAgOUQUAAAgOUQUAAAgOXwmDEAwMRjx7AKAgpa7cKFC5Kkzz777KavUVdXp7KyMg0cOFABAQE3dY1jx47d9M8HAFgbAQWtdvz4cUnSrFmzfNySHwUGBvq6CQCANkZAQaulpqZKku666y716tXrpq5x7NgxPfXUU1q3bp2GDBly020JDAzU4MGDb/r7AQDWREBBq4WEhOiXv/xlm1xryJAhuv/++9vkWgCAzoOneAAAgOUQUAAAgOUQUAAAgOUQUAAAgOUQUAAAgOUQUAAAgOUQUAAAgOUQUAAAgOUQUAAAgOWwkiwA4Kp4uzF8hREUAABgOQQUAABgOQQUAABgOT4NKMuXL1d0dLR69uypuLg47dq1y5fNAQAAFuGzgPLOO+8oIyNDL7zwgg4dOqSRI0cqOTlZ33zzja+aBAAALMJnT/EsXrxYM2fO1C9/+UtJ0muvvaYPP/xQK1asUF5enket2+2W2+0296urq73aVrTehQsXdPz48aueP3bsmMf/Xstdd92lXr16tVnbANw8nuqBt/gkoNTX16u4uFjPP/+8x/GkpCTt2bOnWX1eXp5efPFFbzUPbeD48eOKi4u7bt1TTz113Zri4mLdf//9bdEsAEAH4ZOA8v3336uhoUFhYWEex8PCwlRRUdGsPjs7W5mZmeZ+dXW1IiMj272duHl33XWXiouLr3q+rq5OZWVlGjhwoAICAq57LQBA1+LThdpsNpvHvmEYzY5Jkt1ul91u91az0AZ69ep13VGPBx980EutAdBeuOWD9uKTSbIhISHy8/NrNlpSWVnZbFQFAAB0PT4JKP7+/oqLi1NhYaHH8cLCQiUkJPiiSQAAwEJ8dosnMzNT6enpGjZsmEaMGKGVK1fqm2++0TPPPOOrJgEAAIvwWUB54okndPbsWf32t79VeXm5YmJitHXrVkVFRfmqSQCAW8ScFLQVn06SnTNnjubMmePLJgAAAAviXTwAAMByCCgAAMByCCgAAMByfDoHBQDQuTFpFjeLgAIA8BoCC24Ut3gAdCjLly9XdHS0evbsqbi4OO3atcvXTQLQDhhBAdBhvPPOO8rIyNDy5cv14IMP6o033lBycrL++te/asCAAb5uHm4CIyq4GpthGIavG9Fa1dXVcjgccrlcCgoK8nVzgC7JF5/D4cOH6/7779eKFSvMY0OGDFFqaqry8vKu+/2tafOVf3HCd64MLYSajqs1n8EOOYLSlKmqq6t93BKg62r6/Hnr3zj19fUqLi7W888/73E8KSlJe/bsafF73G633G63ue9yuSTd2O+ORveFW2gt2tKA//ff1zx/vf8/Y37zocd+yYvjW3X+ete7ke/Bj1rze6NDBpSamhpJUmRkpI9bAqCmpkYOh6Pdf87333+vhoaGZm88DwsLa/Zm9CZ5eXl68cUXmx3nd0fn4nitbetbe72b/Z6u7EZ+b3TIgBIREaFTp04pMDBQNpvN183BTaiurlZkZKROnTrFbboOyjAM1dTUKCIiwqs/98rPvGEYV/09kJ2drczMTHO/sbFRP/zwg/r163fN3x1d/c9nV+5/V+671P79b83vjQ4ZULp166b+/fv7uhloA0FBQV3yl0Bn4Y2RkyYhISHy8/NrNlpSWVnZbFSlid1ul91u9zh222233fDP7Op/Prty/7ty36X27f+N/t7gMWMAHYK/v7/i4uJUWFjocbywsFAJCQk+ahWA9tIhR1AAdE2ZmZlKT0/XsGHDNGLECK1cuVLffPONnnnmGV83DUAbI6DAJ+x2u37zm980G34HruWJJ57Q2bNn9dvf/lbl5eWKiYnR1q1bFRUV1aY/p6v/+ezK/e/KfZes1f8OuQ4KAADo3JiDAgAALIeAAgAALIeAAgAALIeAAgAALIeAAgAALIeAAq/auXOnJk6cqIiICNlsNm3evNnXTQKaWb58uaKjo9WzZ0/FxcVp165dvm7SLbveZ88wDOXk5CgiIkIBAQFKTEzU559/7lHjdrs1b948hYSEqHfv3po0aZJOnz7txV7cnLy8PD3wwAMKDAxUaGioUlNTdeLECY+aztz/FStW6N577zVXhx0xYoQ++OAD87xV+05AgVedP39eQ4cO1bJly3zdFKBF77zzjjIyMvTCCy/o0KFDGjlypJKTk/XNN9/4umm35HqfvVdeeUWLFy/WsmXLdODAATmdTo0bN858OaskZWRkaNOmTcrPz9fu3btVW1urlJQUNTQ0eKsbN6WoqEjPPvus9u3bp8LCQl26dElJSUk6f/68WdOZ+9+/f3+9/PLLOnjwoA4ePKhHHnlEjz32mBlCLNt3A/ARScamTZt83QzAw89+9jPjmWee8Th21113Gc8//7yPWtT2rvzsNTY2Gk6n03j55ZfNY//4xz8Mh8Nh/PGPfzQMwzDOnTtn9OjRw8jPzzdr/v73vxvdunUztm3b5rW2t4XKykpDklFUVGQYRtfrv2EYRt++fY3/+q//snTfGUEBgP9TX1+v4uJiJSUleRxPSkrSnj17fNSq9ldaWqqKigqPftvtdo0aNcrsd3FxsS5evOhRExERoZiYmA7338blckmSgoODJXWt/jc0NCg/P1/nz5/XiBEjLN13AgoA/J/vv/9eDQ0Nzd6OHBYW1uwtyp1JU9+u1e+Kigr5+/urb9++V63pCAzDUGZmph566CHFxMRI6hr9P3r0qPr06SO73a5nnnlGmzZt0t13323pvvMuHgC4gs1m89g3DKPZsc7oZvrd0f7bzJ07V0eOHNHu3bubnevM/b/zzjt1+PBhnTt3Tv/zP/+j6dOnq6ioyDxvxb4zggIA/yckJER+fn7N/lVYWVnZ7F+YnYnT6ZSka/bb6XSqvr5eVVVVV62xunnz5um9997TX/7yF/Xv39883hX67+/vr0GDBmnYsGHKy8vT0KFD9Yc//MHSfSegAMD/8ff3V1xcnAoLCz2OFxYWKiEhwUetan/R0dFyOp0e/a6vr1dRUZHZ77i4OPXo0cOjpry8XCUlJZb/b2MYhubOnauCggJ99NFHio6O9jjf2fvfEsMw5Ha7rd33dpt+C7SgpqbGOHTokHHo0CFDkrF48WLj0KFDxtdff+3rpgGGYRhGfn6+0aNHD2PVqlXGX//6VyMjI8Po3bu3UVZW5uum3ZLrffZefvllw+FwGAUFBcbRo0eNJ5980ggPDzeqq6vNazzzzDNG//79jR07dhifffaZ8cgjjxhDhw41Ll265Ktu3ZBf/epXhsPhMD7++GOjvLzc3C5cuGDWdOb+Z2dnGzt37jRKS0uNI0eOGAsXLjS6detmbN++3TAM6/adgAKv+stf/mJIarZNnz7d100DTK+//roRFRVl+Pv7G/fff7/5OGpHdr3PXmNjo/Gb3/zGcDqdht1uNx5++GHj6NGjHteoq6sz5s6dawQHBxsBAQFGSkqK8c033/igN63TUr8lGatXrzZrOnP/f/GLX5h/nm+//XZjzJgxZjgxDOv23WYYhtF+4zMAAACtxxwUAABgOQQUAABgOQQUAABgOQQUAABgOQQUAABgOQQUAABgOQQUAABgOQQUAABgOQQUAABgOQQUAABgOQQUAABgOf8fXEP3hSH1TSYAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "reviews_length = [len(review) for review in df['encoded']]\n",
    "\n",
    "print('리뷰의 최대 길이 : {}'.format(np.max(reviews_length)))\n",
    "print('리뷰의 평균 길이 : {}'.format(np.mean(reviews_length)))\n",
    "\n",
    "plt.subplot(1,2,1)\n",
    "plt.boxplot(reviews_length)\n",
    "plt.subplot(1,2,2)\n",
    "plt.hist(reviews_length, bins=50)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "29af6a5d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2022003522,\n",
       " 2023019573,\n",
       " 2017026233,\n",
       " 2020007797,\n",
       " 2022017650,\n",
       " 2020008519,\n",
       " 2017026117,\n",
       " 2017024097,\n",
       " 2020007609,\n",
       " 2023015577]"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "documents = df['filtered_outline'].to_numpy()\n",
    "\n",
    "tfidfVectorizer = TfidfVectorizer()\n",
    "tfidfMatrix = tfidfVectorizer.fit_transform(documents)\n",
    "similarity = cosine_similarity(tfidfMatrix, tfidfMatrix)\n",
    "\n",
    "outlineToIndex = dict(zip(documents, df.index))\n",
    "\n",
    "def recommendations(outline, similarity=similarity):\n",
    "    index = outlineToIndex[outline]\n",
    "    scores = list(enumerate(similarity[index]))\n",
    "    scores = sorted(scores, key=lambda x:x[1], reverse=True)\n",
    "\n",
    "    scores = scores[1:11]\n",
    "    webtoon_index = [idx[0] for idx in scores]\n",
    "    df['mastrId'].iloc[webtoon_index].tolist()\n",
    "    return df['mastrId'].iloc[webtoon_index].tolist()\n",
    "\n",
    "recommendations(documents[9917])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "2aba3ea9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                title              outline     mastrId     imageDownloadUrl mainGenreCdNm pictrWritrNm pltfomCdNm  adult outline_recommendations\n",
      "0        회장님의 백만가지 대본  교통사고로 머리를 다친 강예성...  2023059426  https://www.kmas...            BL           일천        피너툰      1  [2023011627, 202...   \n",
      "1           회사원 K의 비밀  직장생활 5년 차 김대리 그에...  2023059425  https://www.kmas...            BL           모락     카카오페이지      1  [2023051143, 202...   \n",
      "2      황자님, 왜 잘해 주세요?  저 저를 어떻게 하시려는 거예...  2023059422  https://www.kmas...           드라마           강차     카카오페이지      0  [2023050625, 201...   \n",
      "3        환상의 에덴 [개정판]  어떠한 이유로 방랑하고 있는 ...  2023059421  https://www.kmas...            BL         후지토비     카카오페이지      1  [2023042181, 202...   \n",
      "4         화려한 혼활 버스터즈  혼활에서 연전연패 중인 팔방미...  2023059420  https://www.kmas...           로맨스     hiromyan      레진코믹스      0  [2021017126, 202...   \n",
      "...               ...                  ...         ...                  ...           ...          ...        ...    ...                  ...   \n",
      "20152        060 특수부대  연재기간 20100323 총회...  2017023368  https://www.kmas...            코믹     스바르탄/정재호      머니투데이      0  [2017027866, 201...   \n",
      "20153        0.1초의 설렘  긴 사랑과 짧은 설렘에 대한 보고서  2017023367  https://www.kmas...           드라마          임강혁       다음웹툰      0  [2020007815, 202...   \n",
      "20154    0.0MHz  (완결)  심령현상을 과학적으로 밝히려는...  2017023366  https://www.kmas...            공포           장작       다음웹툰      0  [2023007585, 202...   \n",
      "20155          -0.5˚C  아버지의 불륜으로 사랑에 거부...  2017023365  https://www.kmas...           동성애           연어      레진코믹스      1  [2022017622, 201...   \n",
      "20156            #해모나  셀기꾼 SNS 인기스타인 모나...  2017023364  https://www.kmas...           이성애           신매      배틀코믹스      0  [2017024150, 201...   \n",
      "\n",
      "[20157 rows x 9 columns]\n"
     ]
    }
   ],
   "source": [
    "df['outline_recommendations'] = df['filtered_outline'].apply(lambda x: recommendations(x))\n",
    "columns_to_display = ['title', 'outline','mastrId', 'imageDownloadUrl', 'mainGenreCdNm', 'pictrWritrNm', 'pltfomCdNm','adult','outline_recommendations']\n",
    "print(df[columns_to_display])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
