In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

In [2]:
# Load tournament data
df_tournaments = pd.read_csv('csgo_stier_tournaments_last_year_20251004_114827.csv')

# Convert date columns to datetime
df_tournaments['start_date'] = pd.to_datetime(df_tournaments['start_date'])
df_tournaments['end_date'] = pd.to_datetime(df_tournaments['end_date'])

print(f" Loaded {len(df_tournaments)} tournaments")
print(f" Date range: {df_tournaments['start_date'].min()} to {df_tournaments['end_date'].max()}")
print(f"\n{df_tournaments.head()}")

 Loaded 26 tournaments
 Date range: 2024-10-07 00:00:00 to 2025-12-14 00:00:00

                       tournament_name start_date   end_date  \
0       Intel Extreme Masters Rio 2024 2024-10-07 2024-10-13   
1      BLAST Premier: World Final 2024 2024-10-30 2024-11-03   
2    Perfect World Shanghai Major 2024 2024-11-30 2024-12-15   
3             BLAST Bounty Spring 2025 2025-01-23 2025-01-26   
4  Intel Extreme Masters Katowice 2025 2025-01-29 2025-02-09   

              date_range  prize_pool   location  
0      Oct 07 - 13, 2024    $250,000     Brazil  
1  Oct 30 - Nov 03, 2024  $1,000,000  Singapore  
2  Nov 30 - Dec 15, 2024  $1,250,000      China  
3      Jan 23 - 26, 2025    $482,500    Denmark  
4  Jan 29 - Feb 09, 2025  $1,000,000     Poland  


In [3]:
# Create daily time series for the last year
# Match the same date range as price/player data
start_date = datetime(2024, 10, 4)  # One year ago from today (Oct 4, 2025)
end_date = datetime(2025, 10, 3)    # Yesterday

# Create date range
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
df_daily = pd.DataFrame({'Date': date_range})

print(f"  Created daily time series:")
print(f"  From: {start_date.date()}")
print(f"  To: {end_date.date()}")
print(f"  Total days: {len(df_daily)}")
print(f"\n{df_daily.head()}")

  Created daily time series:
  From: 2024-10-04
  To: 2025-10-03
  Total days: 365

        Date
0 2024-10-04
1 2024-10-05
2 2024-10-06
3 2024-10-07
4 2024-10-08


In [4]:
# Mark days with tournaments
df_daily['has_tournament'] = 0
df_daily['tournament_name'] = ''
df_daily['prize_pool'] = ''
df_daily['tournament_location'] = ''

# For each tournament, mark all days in its date range
for idx, tournament in df_tournaments.iterrows():
    # Create mask for days within tournament date range
    mask = (df_daily['Date'] >= tournament['start_date']) & (df_daily['Date'] <= tournament['end_date'])
    
    # Mark these days
    df_daily.loc[mask, 'has_tournament'] = 1
    df_daily.loc[mask, 'tournament_name'] = tournament['tournament_name']
    df_daily.loc[mask, 'prize_pool'] = tournament['prize_pool']
    df_daily.loc[mask, 'tournament_location'] = tournament['location']

# Summary
tournament_days = df_daily['has_tournament'].sum()
non_tournament_days = len(df_daily) - tournament_days

print(f" Tournament Coverage:")
print(f"  Days with tournaments: {tournament_days} ({tournament_days/len(df_daily)*100:.1f}%)")
print(f"  Days without tournaments: {non_tournament_days} ({non_tournament_days/len(df_daily)*100:.1f}%)")
print(f"\n Sample days with tournaments:")
print(df_daily[df_daily['has_tournament'] == 1][['Date', 'tournament_name', 'prize_pool']].head(10))

 Tournament Coverage:
  Days with tournaments: 178 (48.8%)
  Days without tournaments: 187 (51.2%)

 Sample days with tournaments:
         Date                  tournament_name  prize_pool
3  2024-10-07   Intel Extreme Masters Rio 2024    $250,000
4  2024-10-08   Intel Extreme Masters Rio 2024    $250,000
5  2024-10-09   Intel Extreme Masters Rio 2024    $250,000
6  2024-10-10   Intel Extreme Masters Rio 2024    $250,000
7  2024-10-11   Intel Extreme Masters Rio 2024    $250,000
8  2024-10-12   Intel Extreme Masters Rio 2024    $250,000
9  2024-10-13   Intel Extreme Masters Rio 2024    $250,000
26 2024-10-30  BLAST Premier: World Final 2024  $1,000,000
27 2024-10-31  BLAST Premier: World Final 2024  $1,000,000
28 2024-11-01  BLAST Premier: World Final 2024  $1,000,000


In [5]:
# Keep only essential columns for time series analysis
df_final = df_daily[['Date', 'has_tournament']].copy()

# Save to CSV for use in prediction models
output_file = 'csgo_tournament_events_daily_last_year.csv'
df_final.to_csv(output_file, index=False)

print(f" Saved to: {output_file}")
print(f"\n Dataset Info:")
print(f"   Total records: {len(df_final)}")
print(f"   Columns: {list(df_final.columns)}")
print(f"   Date range: {df_final['Date'].min().date()} to {df_final['Date'].max().date()}")
print(f"\n Sample:")
print(df_final.head(10))
print(f"\n Ready to merge with price and player count data!")

 Saved to: csgo_tournament_events_daily_last_year.csv

 Dataset Info:
   Total records: 365
   Columns: ['Date', 'has_tournament']
   Date range: 2024-10-04 to 2025-10-03

 Sample:
        Date  has_tournament
0 2024-10-04               0
1 2024-10-05               0
2 2024-10-06               0
3 2024-10-07               1
4 2024-10-08               1
5 2024-10-09               1
6 2024-10-10               1
7 2024-10-11               1
8 2024-10-12               1
9 2024-10-13               1

 Ready to merge with price and player count data!


In [6]:
pd.read_csv("csgo_tournament_events_daily_last_year.csv").head()

Unnamed: 0,Date,has_tournament
0,2024-10-04,0
1,2024-10-05,0
2,2024-10-06,0
3,2024-10-07,1
4,2024-10-08,1
