In [1]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
import os
from sklearn.preprocessing import MinMaxScaler
from scipy import stats
from scipy.stats import spearmanr, pearsonr
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import plotly as plt

# print(cwd)

GAME_PRICE_PREDICTION_PATH = os.environ.get('GAME_PRICE_PREDICTION_PATH', '')
sys.path.insert(0, os.path.abspath(GAME_PRICE_PREDICTION_PATH))

cwd = GAME_PRICE_PREDICTION_PATH

from python_scripts.utilities.api_calls import get_cookie_from_blob, fetch_item_to_df, fetch_items
from python_scripts.sentiment_analysis.config import ALL_MENTIONS_FILENAME, ITEM, POLARITY_FOLDER_NAME, ITEM_SANITIZED

""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 
- Takes data from mention_data 
- and a range of others; polarity_data, or fetches price history
- Plots it sexily
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 

c:\Users\Nukul\Desktop\Code\game_price_prediction\python_scripts\sentiment_analysis


'"" \n- Takes data from mention_data \n- and a range of others; polarity_data, or fetches price history\n- Plots it sexily\n'

## prerequisites (data loading + verification)


In [2]:
# Make sure the correct item is imported from config.py

print(f"ITEM = {ITEM}")

ITEM = M4A1-S | Golden Coil (Factory New)


In [3]:
# filter_file_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'filter_file.py')
# !python "{filter_file_path}"

# mention_counter_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'mention_counter.py')
# !python "{mention_counter_path}"

# mention_data_combiner_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'mention_data_combiner.py')
# !python "{mention_data_combiner_path}"

# vader_polarity_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'vader_polarity.py')
# !python "{vader_polarity_path}"

# polarity_score_combiner_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'polarity_score_combiner.py')
# !python "{polarity_score_combiner_path}"


## steam_api fetch price + volume data

In [4]:
### FETCHING ITEM

# Fetch price and volume data FOR ITEM CHOSEN
dailyCookie = get_cookie_from_blob()
items = fetch_items()
df = fetch_item_to_df(ITEM, dailyCookie)
print(f"Data for {ITEM} fetched")


Data for M4A1-S | Golden Coil (Factory New) fetched


 ## timeseries - mentions against volume plot

In [5]:
# Load data
mentions_df = pd.read_csv(os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'reddit_data', 'mention_all', ALL_MENTIONS_FILENAME))
mentions_df['date'] = pd.to_datetime(mentions_df['date'])

# Ensure df has a 'date' column
if 'date' not in df.columns:
    df = df.reset_index()

# Merge mentions and volume data
merged_df = pd.merge(mentions_df, df[['date', 'volume', 'price_usd']], on='date', how='outer').sort_values('date')
merged_df = merged_df.ffill()

# Smooth mentions and volume usign window
window = 5

merged_df['smoothed_mentions'] = merged_df['num_mentions'].rolling(window=window).mean()
merged_df['smoothed_volume'] = merged_df['volume'].rolling(window=window).mean()
merged_df['smoothed_price'] = merged_df['price_usd'].rolling(window=window).mean()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['smoothed_mentions'], name="Smoothed Mentions", line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['smoothed_volume'], name="Smoothed Volume", line=dict(color='red')),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text=f"Smoothed Mentions and Volume Over Time for {ITEM}",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# x and y axis labels
fig.update_yaxes(title_text="Smoothed Number of Mentions", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Smoothed Volume Traded", secondary_y=True, gridcolor='lightgrey')
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()


# Saving.
save_dir = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'figures', ITEM_SANITIZED)
os.makedirs(save_dir, exist_ok=True)

fig.write_image(
    os.path.join(save_dir, f'timeseries_men_vol.png'),
    width=1920, 
    height=1080,
    scale=2
)

# Save interactive HTML
fig.write_html(os.path.join(save_dir, f'timeseries_men_vol.html'))

print(f"Saved to ./data/figures/{ITEM_SANITIZED}")

Saved to ./data/figures/m4a1-s___golden_coil_(factory_new)


## Windowed scatterplot - mentions & volume plot

In [6]:
# Ensure necessary imports are present
import plotly.graph_objects as go
import numpy as np

# Define your date range
# YYYY-MM-DD
start_date = pd.to_datetime('2019-01-01')
end_date = pd.to_datetime('2019-06-01')

window = 7

# Ensure date column is datetime
merged_df['date'] = pd.to_datetime(merged_df['date'])

# Filter dataframe for date range and create a copy
date_filtered_df = merged_df[(merged_df['date'] >= start_date) & 
                            (merged_df['date'] <= end_date)].copy()

# Calculate smoothed values on filtered data using .loc
date_filtered_df.loc[:, 'smoothed_mentions'] = date_filtered_df['num_mentions'].rolling(window=window).mean()
date_filtered_df.loc[:, 'smoothed_volume'] = date_filtered_df['volume'].rolling(window=window).mean()
date_filtered_df.loc[:, 'smoothed_price'] = date_filtered_df['price_usd'].rolling(window=window).mean()

# Remove any NaN values before fitting
clean_df = date_filtered_df.dropna(subset=['smoothed_volume', 'smoothed_mentions'])

# Create the scatter plot
fig = go.Figure()

# Add scatter points
fig.add_trace(
    go.Scatter(
        x=clean_df['smoothed_volume'],
        y=clean_df['smoothed_mentions'],
        mode='markers',
        marker=dict(size=8),
        name='Data Points',
        text=clean_df['date'],
        hovertemplate='Smoothed Volume: %{x}<br>Smoothed Mentions: %{y}<br>Date: %{text}<extra></extra>'
    )
)

# Add regression line only if we have valid data
if len(clean_df) > 1:  # Need at least 2 points for a line
    try:
        z = np.polyfit(clean_df['smoothed_volume'], clean_df['smoothed_mentions'], 1)
        fig.add_trace(
            go.Scatter(
                x=clean_df['smoothed_volume'],
                y=z[0] * clean_df['smoothed_volume'] + z[1],
                mode='lines',
                name=f'Trend line',
                line=dict(color='red')
            )
        )
    except np.linalg.LinAlgError:
        print("Could not calculate regression line due to data issues")

# Update layout for better readability
fig.update_layout(
    title=f"Smoothed Number of Mentions vs. Smoothed Volume for {ITEM}<br>Date Range: {start_date.date()} to {end_date.date()}<br>Smoothing Window: {window} days",
    xaxis_title="Smoothed Volume Traded",
    yaxis_title="Smoothed Number of Mentions",
    height=600,
    width=800,
)

# Show the figure
fig.show()

# saving
save_dir = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'figures', ITEM_SANITIZED)
os.makedirs(save_dir, exist_ok=True)

i = 1
while os.path.exists(os.path.join(save_dir, f'scatter_men_vol_{i}.png')):
    i += 1

# Save static PNG
fig.write_image(
    os.path.join(save_dir, f'scatter_men_vol_{i}.png'),
    width=1920, 
    height=1080,
    scale=2
)

print(f"Saved to ./data/figures/{ITEM_SANITIZED}/scatter_men_vol_{i}.png")

Saved to ./data/figures/m4a1-s___golden_coil_(factory_new)/scatter_men_vol_4.png


## Correlation stats

In [7]:
# Ensure no missing values
merged_df = merged_df.dropna(subset=['num_mentions', 'volume'])

print("Item name: " + ITEM)

# 1. Spearman's Correlation
spearman_corr, spearman_p = spearmanr(merged_df['num_mentions'], merged_df['volume'])
print(f"Spearman's correlation: {spearman_corr}, p-value: {spearman_p}")

# 2. Pearson's Correlation
pearson_corr, pearson_p = pearsonr(merged_df['num_mentions'], merged_df['volume'])
print(f"Pearson's correlation: {pearson_corr}, p-value: {pearson_p}")

# 3. Stationarity Test (ADF)
adf_result = adfuller(merged_df['volume'])
print(f"ADF Statistic: {adf_result[0]}, p-value: {adf_result[1]}")

Item name: M4A1-S | Golden Coil (Factory New)
Spearman's correlation: 0.3494443827944116, p-value: 2.3244530753391633e-96
Pearson's correlation: 0.6907618558436726, p-value: 0.0
ADF Statistic: -1.5607347685758544, p-value: 0.5032817823615146


## Market vol against item vol

In [8]:
# Load market history data
market_df = pd.read_csv(os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'market_history', 'total_market_history.csv'))

if 'date' not in df.columns:
    df = df.reset_index()

# Convert dates to datetime
market_df['date'] = pd.to_datetime(market_df['date'])
df['date'] = pd.to_datetime(df['date'])

# Merge market and item volume data 
merged_df = pd.merge(market_df[['date', 'volume']], df[['date', 'volume']], 
                    on='date', how='outer',
                    suffixes=('_market', '_item')).sort_values('date')
merged_df = merged_df.ffill()

# Apply smoothing
window = 5
merged_df['smoothed_volume_market'] = merged_df['volume_market'].rolling(window=window).mean()
merged_df['smoothed_volume_item'] = merged_df['volume_item'].rolling(window=window).mean()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], 
               y=merged_df['smoothed_volume_market'], 
               name="Market Volume", 
               line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], 
               y=merged_df['smoothed_volume_item'], 
               name="Item Volume", 
               line=dict(color='red')),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title_text=f"Smoothed Market Volume vs Item Volume Over Time for {ITEM}",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update axes labels
fig.update_yaxes(title_text="Smoothed Market Volume", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Smoothed Item Volume", secondary_y=True, gridcolor='lightgrey')
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show figure
fig.show()

# Save plots
save_dir = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'figures', ITEM_SANITIZED)
os.makedirs(save_dir, exist_ok=True)

# Save static PNG only
fig.write_image(
    os.path.join(save_dir, f'timeseries_market_item_vol.png'),
    width=1920, 
    height=1080,
    scale=2
)

print(f"Saved to ./data/figures/{ITEM_SANITIZED}/timeseries_market_item_vol.png")

Saved to ./data/figures/m4a1-s___golden_coil_(factory_new)/timeseries_market_item_vol.png
