In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
sns.set_style('whitegrid')

## 1. Load Data

In [None]:
# Load owned games data
try:
    owned_games = pd.read_csv('../data/owned_games.csv')
    print(f"Loaded {len(owned_games)} owned games")
    owned_games.head()
except FileNotFoundError:
    print("Error: Run src/data_collection.py first to collect your Steam data")
    owned_games = pd.DataFrame()

## 2. Data Exploration

In [None]:
if not owned_games.empty:
    print("Dataset Info:")
    print(owned_games.info())
    print("\nBasic Statistics:")
    print(owned_games.describe())

In [None]:
# Visualize playtime distribution
if not owned_games.empty and 'playtime_hours' in owned_games.columns:
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.hist(owned_games['playtime_hours'], bins=50, edgecolor='black')
    plt.xlabel('Playtime (hours)')
    plt.ylabel('Number of Games')
    plt.title('Distribution of Playtime')
    
    plt.subplot(1, 2, 2)
    top_played = owned_games.nlargest(10, 'playtime_hours')
    plt.barh(top_played['name'], top_played['playtime_hours'])
    plt.xlabel('Hours Played')
    plt.title('Top 10 Most Played Games')
    plt.tight_layout()
    plt.show()

## 3. Feature Engineering

Create features that indicate game preferences:
- Playtime categories (unplayed, tried, played, loved)
- Engagement score based on playtime

In [None]:
if not owned_games.empty:
    # Create playtime categories
    def categorize_playtime(hours):
        if hours == 0:
            return 'unplayed'
        elif hours < 5:
            return 'tried'
        elif hours < 50:
            return 'played'
        else:
            return 'loved'
    
    owned_games['playtime_category'] = owned_games['playtime_hours'].apply(categorize_playtime)
    
    # Calculate engagement score (normalized playtime)
    max_playtime = owned_games['playtime_hours'].max()
    owned_games['engagement_score'] = owned_games['playtime_hours'] / max_playtime if max_playtime > 0 else 0
    
    print("Playtime categories:")
    print(owned_games['playtime_category'].value_counts())

## 4. Simple Recommendation System

Start with a content-based approach using playtime as the main signal.
Later, this can be enhanced with game genres, tags, and features.

In [None]:
def get_top_games(df, n=10):
    """Get user's most played games"""
    return df.nlargest(n, 'playtime_hours')[['name', 'playtime_hours', 'engagement_score']]

if not owned_games.empty:
    print("Your top games:")
    print(get_top_games(owned_games))

## 5. Next Steps

To improve recommendations, we need:
1. **Game metadata**: genres, tags, categories from Steam
2. **User reviews**: sentiment analysis of your reviews
3. **Wishlist data**: games you're interested in
4. **Similar games**: Find games similar to your favorites

The recommendation algorithm will:
- Identify patterns in games you love (50+ hours)
- Find similar games you don't own
- Rank by similarity to your preferences

In [None]:
# Placeholder for future model
# This will be developed as we add more features

class GameRecommender:
    def __init__(self):
        self.model = None
        self.games_data = None
    
    def fit(self, user_games):
        """Train the recommender on user's game library"""
        self.games_data = user_games
        # Model training will be implemented here
        pass
    
    def recommend(self, n=10):
        """Generate top N recommendations"""
        # Recommendation logic will be implemented here
        return []

# Initialize recommender
recommender = GameRecommender()
if not owned_games.empty:
    recommender.fit(owned_games)
    print("Recommender initialized!")

## 6. Save Processed Data

In [None]:
# Save processed data for later use
if not owned_games.empty:
    owned_games.to_csv('../data/processed_games.csv', index=False)
    print("Processed data saved to data/processed_games.csv")