# Tibia Auction Market Analysis - Data Exploration

## Analysis Goals:
1. **Data Exploration** - understanding data structure and quality
2. **Price Trend Analysis** - identifying temporal patterns
3. **World Segmentation** - grouping by market characteristics
4. **Preparation for Predictive Modeling**

## Research Questions:
- Which factors most influence character prices?
- Are there differences between worlds (PvP vs PvE)?
- What does seasonality look like in the market?
- Can we predict future prices?


Libraries imports

In [2]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Database connection
import psycopg2
from sqlalchemy import create_engine

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Statistical analysis
from scipy.stats import pearsonr

# Configuration
import warnings
warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8')

print("Libraries imported successfully.")

Libraries imported successfully.


Connection with database

In [4]:
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Database connection config from environment variables
DB_CONFIG = {
    'host': os.getenv('DB_HOST', 'localhost'),
    'port': os.getenv('DB_PORT', '5432'),
    'database': os.getenv('DB_NAME', 'auction_data'),
    'user': os.getenv('DB_USER', 'scraper'),
    'password': os.getenv('DB_PASSWORD')
}

# Create connection string
connection_string = f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"

# Create engine
engine = create_engine(connection_string)

print("Database connection configured.")


Database connection configured.


In [5]:
# Get all tables and their columns
schema_query = """
SELECT 
    table_name,
    column_name,
    data_type,
    is_nullable
FROM information_schema.columns 
WHERE table_schema = 'public'
ORDER BY table_name, ordinal_position;
"""

schema_df = pd.read_sql(schema_query, engine)
print("Database schema:")
print(schema_df.to_string(index=False))


Database schema:
                 table_name         column_name                   data_type is_nullable
                   auctions                  id                     integer          NO
                   auctions        character_id                     integer         YES
                   auctions         auction_end                      bigint          NO
                   auctions         current_bid                     integer          NO
                   auctions     has_been_bidded                     boolean          NO
                   auctions       is_historical                     boolean          NO
                   auctions          scraped_at timestamp without time zone         YES
     character_greater_gems                  id                     integer          NO
     character_greater_gems        character_id                     integer         YES
     character_greater_gems     gem_description           character varying          NO
     character_

In [6]:
schema_query = """
SELECT 
    a.id AS auction_id,
    a.current_bid,
    c.id AS character_id, 
    c.vocation_id, 
    c.level, 
    c.charm_total,
    c.transfer,
    c.gems_greater,
    c.store_mounts_count,
    c.store_outfits_count,
    s.magic, s.axe, s.sword, s.club, s.distance,
    w.pvp_type,
    w.battleye,
    w.location
FROM auctions a 
JOIN characters c ON a.character_id = c.id
LEFT JOIN skills s ON c.skills_id = s.id
LEFT JOIN worlds w ON c.world_id = w.id
WHERE a.has_been_bidded = true
    AND a.is_historical = true
"""
schema_df = pd.read_sql(schema_query, engine)
display(schema_df.head())
print(schema_df.info())

Unnamed: 0,auction_id,current_bid,character_id,vocation_id,level,charm_total,transfer,gems_greater,store_mounts_count,store_outfits_count,magic,axe,sword,club,distance,pvp_type,battleye,location
0,1969243,801,1969243,4,180,425,True,0,0,0,90.39,17.18,14.28,12.75,13.91,Open,False,
1,1942641,1252,1942641,3,352,1370,False,0,0,0,98.86,12.36,18.67,12.36,21.95,Optional,False,BR
2,1995047,11001,1995047,4,605,10554,True,0,0,0,112.96,12.88,21.86,12.89,13.86,Open,False,EU
3,1979752,551,1979752,2,81,125,True,0,0,0,19.16,17.06,14.3,36.47,118.47,Open,True,
4,1973805,850,1973805,3,264,925,True,0,0,0,81.75,12.06,13.02,12.03,17.52,Open,True,BR


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 815152 entries, 0 to 815151
Data columns (total 18 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   auction_id           815152 non-null  int64  
 1   current_bid          815152 non-null  int64  
 2   character_id         815152 non-null  int64  
 3   vocation_id          815152 non-null  int64  
 4   level                815152 non-null  int64  
 5   charm_total          815152 non-null  int64  
 6   transfer             815152 non-null  bool   
 7   gems_greater         815152 non-null  int64  
 8   store_mounts_count   815152 non-null  int64  
 9   store_outfits_count  815152 non-null  int64  
 10  magic                815152 non-null  float64
 11  axe                  815152 non-null  float64
 12  sword                815152 non-null  float64
 13  club                 815152 non-null  float64
 14  distance             815152 non-null  float64
 15  pvp_type         