In [1]:
from src.utils.model_loader import load_model
import logging
import os

# Настройка логирования
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def test_model_loading():
    try:
        logger.info("Testing model loading...")
        
        # Загрузка модели
        model = load_model()
        
        # Проверка основных свойств модели
        assert hasattr(model, 'predict_proba'), "Model missing predict_proba method"
        assert model.tree_count_ > 0, "Model has no trees"
        
        logger.info("✅ Model loaded successfully!")
        logger.info(f"Model parameters: {model.get_params()}")
        logger.info(f"Model tree count: {model.tree_count_}")
        
        return True
    except Exception as e:
        logger.error(f"❌ Model loading test failed: {str(e)}")
        return False

if __name__ == "__main__":
    # Проверка переменных окружения
    logger.info(f"MODEL_PATH: {os.getenv('MODEL_PATH')}")
    logger.info(f"IS_LMS: {os.getenv('IS_LMS')}")
    
    # Запуск теста
    success = test_model_loading()
    if not success:
        exit(1)

INFO:__main__:MODEL_PATH: models/catboost_min_features.cbm
INFO:__main__:IS_LMS: None
INFO:__main__:Testing model loading...
INFO:src.utils.model_loader:Model loaded successfully from models/catboost_min_features.cbm
INFO:__main__:✅ Model loaded successfully!
INFO:__main__:Model parameters: {'depth': 2, 'learning_rate': 1, 'iterations': 100, 'loss_function': 'Logloss'}
INFO:__main__:Model tree count: 100


In [2]:
def test_minimal():
    from src.utils.feature_processor import FeatureProcessor
    import pandas as pd
    from datetime import datetime
    
    processor = FeatureProcessor()
    user_data = pd.Series({'age': 25, 'gender': 'F'})
    posts_data = pd.DataFrame({'post_id': [1, 2], 'text': ['A', 'B']})
    request_time = datetime.now()
    
    try:
        features = processor.prepare_features(user_data, posts_data, request_time)
        print("✅ Minimal test passed!")
        print(features.head())
    except Exception as e:
        print(f"❌ Minimal test failed: {str(e)}")

In [4]:
import os
import logging
import pandas as pd
from dotenv import load_dotenv
from src.utils.data_loader import DataLoader
from src.utils.feature_processor import FeatureProcessor
from src.utils.model_loader import load_model
from src.utils.recommendation_service import RecommendationService
from datetime import datetime
import time  # Для замера времени выполнения

# Настройка логирования
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Загрузка переменных окружения из .env файла
load_dotenv()

def test_full_pipeline():
    logger.info("\n" + "="*50)
    logger.info("Testing full recommendation pipeline")
    logger.info("="*50)
    
    try:
        start_time = time.time()
        
        # Получаем переменные окружения
        db_user = os.getenv("DB_USER")
        db_password = os.getenv("DB_PASSWORD")
        db_host = os.getenv("DB_HOST")
        db_port = os.getenv("DB_PORT")
        db_name = os.getenv("DB_NAME")
        
        # Проверяем наличие всех переменных
        if not all([db_user, db_password, db_host, db_port, db_name]):
            logger.error("One or more database environment variables are missing!")
            return False
            
        # Формируем строку подключения
        db_url = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
        logger.info(f"Using DB_URL: {db_url}")
        
        # Инициализация компонентов
        logger.info("Initializing DataLoader...")
        data_loader = DataLoader(db_url)
        
        logger.info("Loading features...")
        data_loader.load_features()
        logger.info(f"Loaded {len(data_loader.user_features)} users and {len(data_loader.post_features)} posts")
        
        # Проверяем наличие тестового пользователя
        test_user_id = 205
        if test_user_id not in data_loader.user_features['user_id'].values:
            logger.error(f"User {test_user_id} not found in user features!")
            return False
        
        logger.info(f"User {test_user_id} found in features")
        
        feature_processor = FeatureProcessor()
        model = load_model()
        
        service = RecommendationService(
            data_loader=data_loader,
            model=model,
            feature_processor=feature_processor
        )
        
        # Тестовый запрос
        test_time = datetime(2021, 11, 5)
        logger.info(f"Generating recommendations for user {test_user_id} at {test_time}...")

        logger.info(f"Request time type: {type(test_time)}")
        logger.info(f"Request time value: {test_time}")
        logger.info(f"ISO calendar: {test_time.isocalendar()}")
        
        recommendations = service.get_recommendations(
            user_id=test_user_id,
            request_time=test_time,
            limit=5
        )
        
        logger.info(f"✅ Recommendations generated: {len(recommendations)} items")
        
        if recommendations:
            for i, rec in enumerate(recommendations, 1):
                logger.info(f"{i}. Post ID: {rec.id}, Topic: {rec.topic}")
        else:
            logger.warning("No recommendations generated. Possible reasons:")
            logger.warning("- User has liked all available posts")
            logger.warning("- Model returned no predictions")
            logger.warning("- All posts are filtered out")
        
        # Проверяем лайкнутые посты пользователя
        liked_posts = data_loader.liked_posts
        user_liked_posts = liked_posts[liked_posts['user_id'] == test_user_id]
        logger.info(f"User has liked {len(user_liked_posts)} posts")
        
        # Проверяем количество постов, доступных для рекомендации
        all_post_ids = set(data_loader.post_features['post_id'].values)
        available_posts = all_post_ids - set(user_liked_posts['post_id'].values)
        logger.info(f"Total posts available for recommendation: {len(available_posts)}")
        
        # Замер времени выполнения
        elapsed = time.time() - start_time
        logger.info(f"⏱️ Pipeline executed in {elapsed:.2f} seconds")
        
        return True
        
    except Exception as e:
        logger.exception(f"❌ Pipeline test failed: {str(e)}")
        return False

if __name__ == "__main__":
    test_full_pipeline()

INFO:__main__:
INFO:__main__:Testing full recommendation pipeline
INFO:__main__:Using DB_URL: postgresql://robot-startml-ro:pheiph0hahj1Vaif@postgres.lab.karpov.courses:6432/startml
INFO:__main__:Initializing DataLoader...
INFO:__main__:Loading features...
INFO:src.utils.data_loader:Starting feature loading...
INFO:src.utils.data_loader:Loading post features...
INFO:src.utils.data_loader:Loaded post features: 7023 rows, 24 columns
INFO:src.utils.data_loader:Loading user features...
INFO:src.utils.data_loader:Loaded user features: 163205 rows, 6 columns
INFO:src.utils.data_loader:Loading liked posts...
INFO:src.utils.data_loader:Loaded liked posts: 8136620 rows
INFO:src.utils.data_loader:Creating post details dictionary...
INFO:src.utils.data_loader:Features loaded successfully
INFO:__main__:Loaded 163205 users and 7023 posts
INFO:__main__:User 205 found in features
INFO:src.utils.model_loader:Model loaded successfully from models/catboost_min_features.cbm
INFO:__main__:Generating recom

In [7]:
type(datetime(2021, 11, 5).isocalendar()[1])

int