# Data Collection

### Building Main Item List

In [1]:
from src.utils import build_item_list
import os

current_dir = os.getcwd()
build_item_list(os.path.join(current_dir, 'src', 'data'))

Dark Artistry Cape
Shadow in the Deep Bracers
Sea Rake's Bridle
Sullen Harvest
Golden Basher of Mage Skulls
Shards of Exile
Cauldron of Xahryx
Soul Shredder
Golden Latticean Shards
Stumpy - Nature's Attendant
Sullen Harvest
Splattering Forcipule
Cauldron of Xahryx
Treasure of the Crimson Witness 2016
Faceless Rex
Masque of Awaleb
King Kringle's Tannenkromm
The Hallows Within Tombstone
Vigil Triumph
Razzil's Midas Knuckles
Masque of Awaleb Bundle
Whalehook
Dark Artistry Belt
Venoling
Mania's Mask
Bloodfeather Finery
Whisky the Stout Artifact
Stumpy - Nature's Attendant
Bonkers of Awaleb
Blade of Tears
Sea Rake's Bridle
Golden Greevil
Lockless Luckvase 2015 Autographed by ImbaTV.HT
Geodesic Eidolon
Crystal Dryad
Immortal Reward - Hellborn Grasp
Masque of Awaleb Bundle
Mask of the Demon Trickster
Faceless Rex
Treasure of the Crimson Witness 2021
Dipper the Destroyer Bundle
Golden Offhand Basher of Mage Skulls
Trusty Mountain Yak
Emblems of the Dueling Fates Bundle
The Hallows Within
Alpin

due to differences in the way each item is displayed, Selenium struggles to find the hero name

### Getting Price History Data

In [1]:
from src.utils import process_price_history_row
import pandas as pd
import os

current_dir = os.getcwd()
df = pd.read_csv(os.path.join(current_dir, 'src', 'data', 'main.csv'))
df = df.drop(df[df['Hero ID'] == 0].index)

dir_path = os.path.join(current_dir, 'src', 'data', 'items')
df['Data Available'] = df.apply(lambda row: process_price_history_row(row, dir_path), axis=1)
df = df.drop(df[df['Data Available'] == False].index)
df.to_csv(os.path.join(current_dir, 'src', 'data', 'main.csv'), index = False)

Error: could not save price history for Taunt: The Cat Dancer!


this creates a subset of useable data. now we find hero winrates and popularity!

### Getting Hero Data

Due to a limit on the use of the API, I will use a single item here. 

Personally a huge fan of Phantom Assassin (who isn't)
aug 31, 2020 Dread Requisition from Lifestealer hero id 54

In [2]:
from src.utils import get_hero_stats

hero_id = 54
till_date = "2020-08-31"
with open('config.txt', 'r') as file:
    token = file.read().split('\n')[1]

results = get_hero_stats(hero_id, till_date, token)


In [9]:
import pandas as pd
import os

df = pd.DataFrame(results)
df['winPercentage'] = (df['winCount'] / df['matchCount']) * 100

# Take a peak
df.head(5)

# Save as CSV
current_dir = os.getcwd()
df.to_csv(os.path.join(current_dir, 'src', 'data', 'heros', '54.csv'), index = False)

Unfortunately, STRATZ API only keeps a store of monthly data going back years. For daily data, it only stores the past 12 days. Due to the low frequency nature of this strategy, it is important to use the longer timescale.

# The Strategy

win rate at t-1 impacts popularity at t
popularity impacts prices of items of the hero

### Linear Regression

#### Data Manipulation

In [25]:
import pandas as pd
import matplotlib.pyplot as plt

# Read Item CSV file
df_item = pd.read_csv(r"src\data\items\Golden%20Dread%20Requisition.csv")
df_item['Timestamp'] = pd.to_datetime(df_item['Timestamp'])
df_item.set_index('Timestamp', inplace=True)
df_item = df_item.resample('MS').mean()
display(df_item.head(5))

# Read Hero CSV file
df_hero = pd.read_csv(r"src\data\heros\54.csv")
df_hero['Timestamp'] = pd.to_datetime(df_hero['month'])
df_hero = df_hero[['Timestamp', 'winPercentage']]
df_hero.set_index('Timestamp', inplace=True)
display(df_hero.head(5))

# Merge both DFs
df = pd.merge(df_item, df_hero, left_index=True, right_index=True, how='inner')
df.head(5)

Unnamed: 0_level_0,Price,Volume
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-09-01,0.264667,2852.1
2020-10-01,0.243516,1105.193548
2020-11-01,0.256833,1226.333333
2020-12-01,0.205419,1013.032258
2021-01-01,0.174871,748.354839


50


Unnamed: 0_level_0,winPercentage
Timestamp,Unnamed: 1_level_1
2024-09-01,49.387486
2024-08-01,50.020162
2024-07-01,49.730506
2024-06-01,52.154103
2024-05-01,52.422699


56


Unnamed: 0_level_0,Price,Volume,winPercentage
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-09-01,0.264667,2852.1,49.187262
2020-10-01,0.243516,1105.193548,49.078864
2020-11-01,0.256833,1226.333333,49.266379
2020-12-01,0.205419,1013.032258,47.534749
2021-01-01,0.174871,748.354839,47.950028


#### Exploratory Plots

In [None]:
# Calculate the 3-month Simple Moving Average (SMA)
df_item['SMA_3'] = df_item['Price'].rolling(window=3).mean()

# Plot the original 'Price' data and the 10-day SMA
plt.figure(figsize=(10, 6))
plt.plot(df_item['Price'], label='Price')
plt.plot(df_item['SMA_3'], label='3-Month SMA', color='orange')
plt.title("Price and 3-Month Simple Moving Average")
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

In [None]:
# Linear Regression and Results

#### Discussion

### Modelling Strategy

In [None]:
import pandas as pd
from backtesting import Backtest, Strategy

class LowFreq(Strategy):
    # Define the parameters for your strategy
    buy_time = '09:00:00'  # Time to buy
    sell_time = '15:00:00'  # Time to sell

    def init(self):
        pass

    def next(self):
        current_time = self.data.index[self.i].time()  # Get current time of the data point
        
        # Buy condition
        if current_time == pd.to_datetime(self.buy_time).time() and not self.position:
            self.buy()  # Enter a buy order

        # Sell condition
        elif current_time == pd.to_datetime(self.sell_time).time() and self.position:
            self.sell()  # Enter a sell order

# Run the backtest
bt = Backtest(df, TimeBasedStrategy, cash=10000, commission=.002)
stats = bt.run()
bt.plot()
