# Tibia Auction Market Analysis - Data Exploration

## Analysis Goals:
1. **Data Exploration** - understanding data structure and quality
2. **Price Trend Analysis** - identifying temporal patterns
3. **World Segmentation** - grouping by market characteristics
4. **Preparation for Predictive Modeling**

## Research Questions:
- Which factors most influence character prices?
- Are there differences between worlds (PvP vs PvE)?
- What does seasonality look like in the market?
- Can we predict future prices?


Libraries imports

In [5]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Database connection
import psycopg2
from sqlalchemy import create_engine

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Statistical analysis
from scipy.stats import pearsonr

# Configuration
import warnings
warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8')

print("Libraries imported successfully.")

Libraries imported successfully.


Connection with database

In [6]:
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Database connection config from environment variables
DB_CONFIG = {
    'host': os.getenv('DB_HOST', 'localhost'),
    'port': os.getenv('DB_PORT', '5432'),
    'database': os.getenv('DB_NAME', 'auction_data'),
    'user': os.getenv('DB_USER', 'scraper'),
    'password': os.getenv('DB_PASSWORD')
}

# Create connection string
connection_string = f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"

# Create engine
engine = create_engine(connection_string)

print("Database connection configured.")


Database connection configured.


In [7]:
# Get all tables and their columns
schema_query = """
SELECT 
    table_name,
    column_name,
    data_type,
    is_nullable
FROM information_schema.columns 
WHERE table_schema = 'public'
ORDER BY table_name, ordinal_position;
"""

schema_df = pd.read_sql(schema_query, engine)
print("Database schema:")
print(schema_df.to_string(index=False))


Database schema:
                 table_name         column_name                   data_type is_nullable
                   auctions                  id                     integer          NO
                   auctions        character_id                     integer         YES
                   auctions         auction_end                      bigint          NO
                   auctions         current_bid                     integer          NO
                   auctions     has_been_bidded                     boolean          NO
                   auctions       is_historical                     boolean          NO
                   auctions          scraped_at timestamp without time zone         YES
     character_greater_gems                  id                     integer          NO
     character_greater_gems        character_id                     integer         YES
     character_greater_gems     gem_description           character varying          NO
     character_

In [33]:
schema_query = """
SELECT 
    a.id AS auction_id,
    a.current_bid,
    a.auction_end,
    c.id AS character_id, 
    c.vocation_id, 
    c.level, 
    c.sex,
    c.achievement_points,
    c.boss_points,
    c.tc_invested,
    c.charm_total,
    c.charm_expansion,
    c.prey_slot,
    c.hunting_slot,
    c.transfer,
    c.gems_greater,
    c.outfits_count,
    c.mounts_count,
    c.store_mounts_count,
    c.store_outfits_count,
    c.hirelings_count,
    s.magic, s.axe, s.sword, s.club, s.distance, s.shielding, s.fist,
    w.pvp_type,
    w.battleye,
    w.location,
    (SELECT COUNT(*) FROM character_rare_achievements cra WHERE cra.character_id = c.id) as rare_achievements_count,
    (SELECT COUNT(*) FROM character_imbuements ci WHERE ci.character_id = c.id) as imbuements_known_count
FROM auctions a 
JOIN characters c ON a.character_id = c.id
LEFT JOIN skills s ON c.skills_id = s.id
LEFT JOIN worlds w ON c.world_id = w.id
WHERE a.has_been_bidded = true
    AND a.is_historical = true
"""
auctions_df = pd.read_sql(schema_query, engine)

auctions_df['auction_end_dt'] = pd.to_datetime(auctions_df['auction_end'], unit='s')
auctions_df['auction_month'] = auctions_df['auction_end_dt'].dt.month
auctions_df['auction_day_of_week'] = auctions_df['auction_end_dt'].dt.dayofweek

display(auctions_df.head())
print(auctions_df.info())

Unnamed: 0,auction_id,current_bid,auction_end,character_id,vocation_id,level,sex,achievement_points,boss_points,tc_invested,charm_total,charm_expansion,prey_slot,hunting_slot,transfer,gems_greater,outfits_count,mounts_count,store_mounts_count,store_outfits_count,hirelings_count,magic,axe,sword,club,distance,shielding,fist,pvp_type,battleye,location,rare_achievements_count,imbuements_known_count,auction_end_dt,auction_month,auction_day_of_week
0,1969938,5002,1759500000,1969938,1,508,False,931,5425,-1,6565,True,False,False,False,0,47,52,0,0,0,11.43,120.84,42.3,80.36,13.88,112.91,21.49,Optional,False,BR,0,23,2025-10-03 14:00:00,10,4
1,1969243,801,1759395600,1969243,4,180,False,14,0,-1,425,False,False,False,True,0,12,2,0,0,1,90.39,17.18,14.28,12.75,13.91,30.6,13.01,Open,False,,0,0,2025-10-02 09:00:00,10,3
2,1304164,3901,1690329600,1304164,2,368,False,200,1400,-1,3661,False,True,False,True,0,15,6,0,2,0,30.86,13.48,13.48,13.48,109.11,93.34,21.84,Open,True,BR,0,11,2023-07-26 00:00:00,7,2
3,1958247,502,1758099600,1958247,1,272,False,156,0,0,1155,False,False,False,True,0,18,7,0,0,0,9.78,109.87,68.17,14.49,13.83,103.34,19.82,Open,False,BR,0,0,2025-09-17 09:00:00,9,2
4,1935665,15000,1755421200,1935665,4,566,False,176,2010,-1,4511,True,True,False,True,0,22,5,0,1,0,103.56,12.49,15.63,12.49,17.63,32.4,16.02,Open,True,BR,0,21,2025-08-17 09:00:00,8,6


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 815152 entries, 0 to 815151
Data columns (total 36 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   auction_id               815152 non-null  int64         
 1   current_bid              815152 non-null  int64         
 2   auction_end              815152 non-null  int64         
 3   character_id             815152 non-null  int64         
 4   vocation_id              815152 non-null  int64         
 5   level                    815152 non-null  int64         
 6   sex                      815152 non-null  bool          
 7   achievement_points       815152 non-null  int64         
 8   boss_points              815152 non-null  int64         
 9   tc_invested              815152 non-null  int64         
 10  charm_total              815152 non-null  int64         
 11  charm_expansion          815152 non-null  bool          
 12  prey_slot       

# Skills analysis for each profession

In [37]:
vocation_skills = schema_df.groupby('vocation_id').agg({
    'magic': 'mean',
    'distance': 'mean',
    'sword': 'mean',
    'axe': 'mean',
    'club': 'mean',
    'shielding': 'mean',
    'fist': 'mean',
    'auction_id' : 'count'
}).round(3)

vocation_skills.columns = ['avg_magic', 'avg_distance', 'avg_sword', 'avg_axe', 'avg_club', 'avg_shielding', 'avg_fist', 'count']
print("Average skills for each profession:")
display(vocation_skills)



Average skills for each profession:


Unnamed: 0_level_0,avg_magic,avg_distance,avg_sword,avg_axe,avg_club,avg_shielding,avg_fist,count
vocation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2.211,14.596,17.963,13.544,13.844,22.293,12.435,1363
1,9.284,19.063,73.92,58.528,53.432,101.059,15.553,249867
2,26.04,111.846,21.298,16.383,18.45,99.713,16.078,220784
3,85.046,15.525,14.76,12.859,13.533,33.452,13.295,149761
4,88.271,16.467,15.639,13.141,14.067,33.742,13.567,186499
5,31.797,13.888,14.048,13.514,14.002,58.141,88.724,6878


In [44]:
vocation_mapping = {
    0: 'None/Rokie',
    1: 'Knight',
    2: 'Paladin',
    3: 'Druid',
    4: 'Sorcerer',
    5: 'Monk'
}

schema_df['vocation_name'] = schema_df['vocation_id'].map(vocation_mapping)

#tab with names of characters instead coded
vocation_skills_named = vocation_skills.copy()
vocation_skills_named.index = vocation_skills_named.index.map(vocation_mapping)
print("\n Average skills for each profession:")
display(vocation_skills_named)



 Average skills for each profession:


Unnamed: 0_level_0,avg_magic,avg_distance,avg_sword,avg_axe,avg_club,avg_shielding,avg_fist,count
vocation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
None/Rokie,2.211,14.596,17.963,13.544,13.844,22.293,12.435,1363
Knight,9.284,19.063,73.92,58.528,53.432,101.059,15.553,249867
Paladin,26.04,111.846,21.298,16.383,18.45,99.713,16.078,220784
Druid,85.046,15.525,14.76,12.859,13.533,33.452,13.295,149761
Sorcerer,88.271,16.467,15.639,13.141,14.067,33.742,13.567,186499
Monk,31.797,13.888,14.048,13.514,14.002,58.141,88.724,6878
