In [12]:
import subprocess
import sys
import warnings
import os
import zipfile
import shutil


def silent_install(package):
    subprocess.run([sys.executable, "-m", "pip", "install", package],
                   stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

for pkg in ["wordcloud", "nltk", "vaderSentiment", "textblob", "kaggle", "sentence-transformers"]:
    silent_install(pkg)

warnings.simplefilter(action='ignore', category=FutureWarning)

import nltk
nltk.download('stopwords', quiet=True)
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import nltk
from nltk.corpus import stopwords
import string
from collections import Counter
#import streamlit as st
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


# 1. Подключение к Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 2. Функция загрузки Excel или CSV
import pandas as pd
import os

def load_drive_dataset(file_path: str):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"❌ File not found: {file_path}")

    if file_path.endswith(".xlsx"):
        df = pd.read_excel(file_path)
    elif file_path.endswith(".csv"):
        df = pd.read_csv(file_path)
    else:
        raise ValueError("❌ Unsupported file type. Please use .csv or .xlsx")

    print("✅ Dataset successfully loaded from Google Drive.")
    return df

# 3. Задаём путь к файлу на Google Drive
file_path = "/content/drive/MyDrive/Colab Notebooks/primary.xlsx"  # Измени путь, если файл в другой папке

# 4. Загружаем DataFrame
df = load_drive_dataset(file_path)

# 5. Проверяем загрузку
df.head()



#Смысловой анализ
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Загружаем смысловую модель
model = SentenceTransformer('paraphrase-MiniLM-L3-v2')  # легче, быстрее


from tqdm import tqdm

texts = df["Wiki_Plot"].fillna("").tolist()
batch_size = 16
embeddings = []

for i in tqdm(range(0, len(texts), batch_size), desc="Encoding batches"):
    batch = texts[i:i+batch_size]
    emb = model.encode(batch)
    embeddings.extend(emb)

import pickle

with open("plot_vectors.pkl", "wb") as f:
    pickle.dump(embeddings, f)

# Если нужно использовать вектора:
plot_embeddings = embeddings  # просто переназначаем


from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Преобразуем plot_embeddings в массив
plot_embeddings = np.array(plot_embeddings)

# Считаем матрицу схожести
similarity_matrix = cosine_similarity(plot_embeddings)

# Получаем индексы топ-3
top3_indices = np.argsort(-similarity_matrix, axis=1)[:, 1:4]  # [1:4] — пропускаем саму себя

# Добавляем названия и обложки в DataFrame
df["Top3_Titles"] = [[df.iloc[i]["Book_Title"] for i in row] for row in top3_indices]
df["Top3_Covers"] = [[df.iloc[i]["Cover_URL"] for i in row] for row in top3_indices]





# Дашборд с обложками

df_sorted = df.sort_values(by="Year")

# Собираем HTML
html_blocks = ""
for _, row in df_sorted.iterrows():
    rating = row['goodreads_rating']
    percentage = (rating / 5) * 100
    book_title = row['Book_Title']
    cover_url = row['Cover_URL']
    year = int(row['Year'])

    similar_html = ""
    if isinstance(row["Top3_Covers"], list) and isinstance(row["Top3_Titles"], list):
        for sim_cover, sim_title in zip(row["Top3_Covers"], row["Top3_Titles"]):
            similar_html += f"""
            <div class="similar-book">
                <img src="{sim_cover}" class="similar-cover" alt="{sim_title}">
                <div class="similar-title">{sim_title}</div>
            </div>
            """

    html_blocks += f"""
    <div class="book-item">
        <div class="book-cover-wrapper">
            <div class="star-rating">
                <div class="stars-outer">
                    <div class="stars-inner" style="width: {percentage:.2f}%;"></div>
                </div>
            </div>
            <div class="tooltip">
                <div class="cover-container">
                    <img src="{cover_url}" class="book-cover">
                </div>
                <div class="similar-popup">
                 <div class="similar-label">Recommended</div>
                 <div class="similar-container">
                    {similar_html}
                 </div>
               </div>
            </div>
            <div class="book-title">{book_title}</div>
            <div class="book-year">{year}</div>
        </div>
    </div>
    """


# Стили и блок
timeline_html = f"""
<style>
    .timeline-container {{
        display: flex;
        flex-wrap: nowrap;
        overflow-x: auto;
        gap: 20px;
        padding: 20px 0;
    }}
    .book-item {{
        display: flex;
        flex: 0 0 auto;
        flex-direction: column;
        text-align: center;
        align-items: center;
        height: 350px;
        max-width: 160px;
        position: relative;
        font-family: sans-serif;
    }}
    .book-cover {{
        width: 100%;
        border-radius: 8px;
        box-shadow: 0 4px 6px rgba(0,0,0,0.3);
        transition: transform 0.3s ease;
    }}
    .cover-container {{
        height: 290px; /* или сколько тебе подходит */
        display: flex;
        align-items: flex-start; /* чтобы обложки начинались с одной линии сверху */
        justify-content: center;
    }}
    .book-cover:hover {{
        transform: scale(1.2);
        z-index: 2;
    }}
    .tooltip {{
        position: relative;
        display: inline-block;
    }}
    .tooltiptext {{
        visibility: hidden;
        width: 180px;
        background-color: #222;
        color: #fff;
        text-align: left;
        border-radius: 6px;
        padding: 8px;
        position: absolute;
        z-index: 10;
        bottom: 110%;
        left: 50%;
        transform: translateX(-50%);
        opacity: 0;
        transition: opacity 0.3s;
        font-size: 12px;
    }}
    .tooltip:hover .tooltiptext {{
        visibility: visible;
        opacity: 1;
    }}
    .book-title {{
        font-weight: bold;
        margin-top: 5px;
    }}
    .book-year, .book-genre {{
        font-size: 12px;
        color: #666;
    }}

   .star-rating {{
        display: block;
        text-align: center;
        margin-bottom: 20px;
        font-size: 14px;
        position: relative;
        unicode-bidi: bidi-override;
}}

    .stars-outer {{
        color: #ccc;
        position: relative;
        display: inline-block;
        font-size: 18px;
}}

    .stars-outer::before {{
        content: "★★★★★";
}}

    .stars-inner {{
        color: #f39c12;
        position: absolute;
        top: 0;
        left: 0;
        white-space: nowrap;
        overflow: hidden;
        width: 0;
}}

    .stars-inner::before {{
        content: "★★★★★";
}}

.similar-popup {{
    display: none;
    position: absolute;
    bottom: -210px;
    left: 50%;
    transform: translateX(-50%);
    background-color: #fff;
    border: 1px solid #ddd;
    padding: 10px;
    box-shadow: 0 4px 10px rgba(0,0,0,0.1);
    z-index: 5;
    border-radius: 8px;
    text-align: center;
    flex-direction: column;
    align-items: center;
    width: auto;
}}
.tooltip:hover .similar-popup {{
    display: flex;
    justify-content: center;
    gap: 10px;
}}

.similar-book {{
    display: flex;
    flex-direction: column;
    align-items: center;
    width: 90px;
}}

.similar-cover {{
    width: 100%;
    border-radius: 4px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.2);
}}

.similar-title {{
    font-size: 10px;
    margin-top: 5px;
    color: #333;
}}
.similar-label {{
    font-weight: bold;
    font-size: 12px;
    margin-bottom: 8px;
    color: #222;
    width: 100%;
}}

.similar-container {{
    display: flex;
    justify-content: center;
    gap: 10px;
}}

</style>

<div class="timeline-container">
    {html_blocks}
</div>
"""

# Показать в ячейке
from IPython.display import display, HTML
display(HTML(timeline_html))



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Dataset successfully loaded from Google Drive.


Encoding batches: 100%|██████████| 6/6 [00:03<00:00,  1.55it/s]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>