In [21]:
import sys
import plotly.graph_objects as go
import pandas as pd
import requests
import numpy as np
import os
import re
import random
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import median_absolute_error
from sklearn.preprocessing import StandardScaler
import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
from skopt.plots import plot_histogram, plot_objective_2D
from skopt.utils import use_named_args
from keras.callbacks import EarlyStopping
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LayerNormalization
from tensorflow import keras
from keras import layers
from kerastuner import RandomSearch, Hyperband, BayesianOptimization

cwd = os.getcwd()
# print(cwd)

# # for windownoobs
GAME_PRICE_PREDICTION_PATH = os.path.abspath(os.path.join(cwd, '..', '..'))

# # for mac
#GAME_PRICE_PREDICTION_PATH = os.path.abspath(os.path.join(cwd))

sys.path.insert(0, os.path.abspath(GAME_PRICE_PREDICTION_PATH))

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

### Config

In [22]:
def sanitize_filename(filename):
    # Replace disallowed characters with underscores
    filename = re.sub(r'[\\/*?:"<>|]', '_', filename)
    # Replace spaces with underscores
    filename = re.sub(r'\s+', '_', filename)
    # Convert filename to lowercase
    filename = filename.lower()
    return filename

# input the skibbity item's name here :P  NOTE: ITEMS = [] for mispelled items
ITEM = "M4A1-S | Golden Coil (Factory New)"
ITEM_SANITIZED = sanitize_filename(ITEM)

# For filter_file               |    NOTE: (input: compressed -> output: filtered_data (MS) )
INPUT_COMPRESSED = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'reddit_data', 'compressed_data')
FILTERED_DATA_DIRECTORY = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'reddit_data', 'filtered_data', f'{ITEM_SANITIZED}_filtered')

# for mention_counter           |   NOTE:  (input: filtered_data (MS) -> output: mention_data (MS))
MENTION_DATA_DIRECTORY = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'reddit_data', 'mention_data', f'{ITEM_SANITIZED}_mentions')

# mention_data_combiner         |   NOTE:  (input: mention_data (MS) -> output: mention_ALL)  - directories N/A for this.
ALL_MENTIONS_FILENAME = f'{ITEM_SANITIZED}_all_mentions.csv'
ALL_MENTIONS_DATA = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'reddit_data', 'mention_all', ALL_MENTIONS_FILENAME)

# For vader_polarity            |    NOTE: (input: filtered -> output: polarity_data)
POLARITY_FOLDER_NAME = f'{ITEM_SANITIZED}_polarity'
POLARITY_DATA_DIRECTORY = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'reddit_data', 'polarity_data', POLARITY_FOLDER_NAME)
OUTPUT_POLARITY_FORMAT = "csv"

### Get price history data

In [23]:
def fetch_item_from_api(item, dailyCookie):
    # get historical price data of item from API
    url = "https://steamcommunity.com/market/pricehistory/"
    params = {
        'country': 'US',
        'currency': '1',
        'appid': '730',
        'market_hash_name': item
    }
    cookies = {'steamLoginSecure': dailyCookie}

    response = requests.get(url, params=params, cookies=cookies)
    json_data = response.json()
    
    # print error message if request failed
    if response.status_code != 200:
        print(f"Failed to fetch data for {item}. Status code: {response.status_code}")
        return None 
           
    # convert and clean data to dataframe object
    price_history = json_data['prices']
    price_history_df = pd.DataFrame(price_history, columns=['date', 'price_usd', 'volume'])
    price_history_df['date'] = pd.to_datetime(price_history_df['date'].str[0:-4], format='%b %d %Y %H')
    price_history_df['volume'] = pd.to_numeric(price_history_df['volume'])
    price_history_df.set_index('date', inplace=True)
   
    return price_history_df
def fetch_item_to_df(item, dailyCookie):
    price_history_df = fetch_item_from_api(item, dailyCookie)
    grouped_current_item = price_history_df.groupby(pd.Grouper(freq='D')).agg({
    'price_usd':'median',
    'volume':'sum'
    })
    return grouped_current_item

def get_cookie_from_blob():
    blob_url = "https://steamgraphsstorage.blob.core.windows.net/container-for-blob/cookie.txt?sp=rwd&st=2024-08-06T20:45:18Z&se=2025-09-10T04:45:18Z&spr=https&sv=2022-11-02&sr=c&sig=MKticGz9P9HPI7iXp1a6yuErc5Sv6P9fY%2FfCbxL0PLg%3D"
    response = requests.get(blob_url)
    response.raise_for_status()
    return response.text


def fetch_items():
    items = ["Glove Case Key", "Officer Jacques Beltram | Gendarmerie Nationale", "Kilowatt Case", "AK-47 | Blue Laminate (Factory New)", "Glove Case", "★ StatTrak™ Paracord Knife | Case Hardened (Field-Tested)"]
    return items

dailyCookie = get_cookie_from_blob()
items = fetch_items()
current_item = fetch_item_to_df(items[4], dailyCookie)
df = current_item


### Get mentions data, Preprocessing

In [24]:
mentions_df = pd.read_csv(os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'reddit_data', 'mention_all', ALL_MENTIONS_FILENAME))
mentions_df['date'] = pd.to_datetime(mentions_df['date'])

# Ensure df has a 'date' column
if 'date' not in df.columns:
    df = df.reset_index()

# Merge mentions and volume data
merged_df = pd.merge(mentions_df, df[['date', 'volume']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Smooth mentions and volume using a rolling average
merged_df['smoothed_mentions'] = merged_df['num_mentions'].rolling(window=10).mean()
merged_df['smoothed_volume'] = merged_df['volume'].rolling(window=10).mean()
merged_df = merged_df.dropna()

print(merged_df)

           date  num_mentions   volume  smoothed_mentions  smoothed_volume
1213 2016-12-08          29.0  44747.0               25.7          51431.0
1214 2016-12-09          18.0  46648.0               22.7          50727.2
1215 2016-12-10          34.0  53017.0               22.8          51092.9
1216 2016-12-11          20.0  52132.0               23.4          51439.3
1217 2016-12-12          22.0  41960.0               24.1          50187.9
...         ...           ...      ...                ...              ...
4082 2024-10-16           4.0   3071.0                4.0           3106.2
4083 2024-10-17           4.0   2661.0                4.0           3143.2
4084 2024-10-18           4.0   3097.0                4.0           3161.4
4085 2024-10-19           4.0   3141.0                4.0           3198.9
4086 2024-10-20           4.0   3498.0                4.0           3268.5

[2874 rows x 5 columns]


  merged_df = merged_df.fillna(method='ffill')


### Scaling

In [25]:
# Create scalers
mentions_scaler = MinMaxScaler(feature_range=(0, 1))
volume_scaler = MinMaxScaler(feature_range=(0, 1))

# Fit and transform the 'num_mentions' column
merged_df['scaled_mentions'] = mentions_scaler.fit_transform(merged_df[['num_mentions']])

# Fit and transform the 'volume' column
merged_df['scaled_volume'] = volume_scaler.fit_transform(merged_df[['volume']])

# Print the first few rows to verify
print(merged_df[['num_mentions', 'scaled_mentions', 'volume', 'scaled_volume']].head())

      num_mentions  scaled_mentions   volume  scaled_volume
1213          29.0         0.109804  44747.0       0.082008
1214          18.0         0.066667  46648.0       0.085654
1215          34.0         0.129412  53017.0       0.097872
1216          20.0         0.074510  52132.0       0.096174
1217          22.0         0.082353  41960.0       0.076662


### Train-test-validation split

In [26]:
X = merged_df['scaled_mentions'].values.reshape(-1, 1)
y = merged_df['scaled_volume'].values

# Calculate split indices
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))

# Split the data
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]

In [27]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import BayesianOptimization

def build_model(hp):
    model = keras.Sequential()
    
    # Tune the number of LSTM layers and units
    for i in range(hp.Int('num_lstm_layers', 1, 3)):
        model.add(layers.LSTM(
            units=hp.Int(f'lstm_units_{i}', min_value=32, max_value=256, step=32),
            return_sequences=True if i < hp.Int('num_lstm_layers', 1, 3) - 1 else False,
            input_shape=(1, 1) if i == 0 else None
        ))
    
    # Tune the number of dense layers and units
    for i in range(hp.Int('num_dense_layers', 1, 3)):
        model.add(layers.Dense(
            units=hp.Int(f'dense_units_{i}', min_value=32, max_value=256, step=32),
            activation=hp.Choice('dense_activation', ['relu', 'tanh'])
        ))
    
    # Output layer
    model.add(layers.Dense(1))
    
    # Tune learning rate
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

# Instantiate the tuner
tuner = BayesianOptimization(
    build_model,
    objective='val_loss',
    max_trials=50,
    directory='keras_tuner_dir',
    project_name='lstm_tuning'
)

# Reshape input data for LSTM
X_train_reshaped = X_train.reshape((X_train.shape[0], 1, 1))
X_val_reshaped = X_val.reshape((X_val.shape[0], 1, 1))
X_test_reshaped = X_test.reshape((X_test.shape[0], 1, 1))

# Perform the search
tuner.search(X_train_reshaped, y_train, 
             epochs=50, 
             validation_data=(X_val_reshaped, y_val),
             callbacks=[keras.callbacks.EarlyStopping(patience=5)])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Print the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters:")
for hp in best_hps.values:
    print(f"{hp}: {best_hps.get(hp)}")

# Evaluate the model
test_loss = best_model.evaluate(X_test_reshaped, y_test)
print(f"Test Loss: {test_loss}")

# Make predictions
predictions = best_model.predict(X_test_reshaped)

# Print some sample predictions vs actual values
print("\nSample Predictions vs Actual Values:")
for i in range(10):
    print(f"Prediction: {predictions[i][0]:.2f}, Actual: {y_test[i]:.2f}")

Trial 50 Complete [00h 00m 08s]
val_loss: 0.0005758625920861959

Best val_loss So Far: 0.0005758625920861959
Total elapsed time: 00h 08m 00s


  saveable.load_own_variables(weights_store.get(inner_path))


Best Hyperparameters:
num_lstm_layers: 3
lstm_units_0: 32
num_dense_layers: 3
dense_units_0: 96
dense_activation: relu
learning_rate: 0.01
lstm_units_1: 96
lstm_units_2: 32
dense_units_1: 256
dense_units_2: 256
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 9.2547e-04 
Test Loss: 0.0010431850096210837
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step

Sample Predictions vs Actual Values:
Prediction: 0.04, Actual: 0.01
Prediction: 0.04, Actual: 0.01
Prediction: 0.04, Actual: 0.01
Prediction: 0.04, Actual: 0.01
Prediction: 0.04, Actual: 0.01
Prediction: 0.04, Actual: 0.01
Prediction: 0.04, Actual: 0.00
Prediction: 0.04, Actual: 0.00
Prediction: 0.04, Actual: 0.01
Prediction: 0.04, Actual: 0.01
