In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
import os
from sklearn.preprocessing import MinMaxScaler
from scipy import stats
from scipy.stats import spearmanr, pearsonr
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import plotly as plt

# print(cwd)

GAME_PRICE_PREDICTION_PATH = os.environ.get('GAME_PRICE_PREDICTION_PATH', '')
sys.path.insert(0, os.path.abspath(GAME_PRICE_PREDICTION_PATH))

cwd = GAME_PRICE_PREDICTION_PATH

from python_scripts.utilities.api_calls import get_cookie_from_blob, fetch_item_to_df, fetch_items
from python_scripts.sentiment_analysis.config import ITEM, POLARITY_FOLDER_NAME, ITEM_SANITIZED

""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 
- Takes data from mention_data 
- and a range of others; polarity_data, or fetches price history
- Plots it sexily
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 

## prerequisites (data loading + verification)


In [None]:
# Make sure the correct item is imported from config.py

print(f"ITEM = {ITEM}")

In [4]:
# filter_file_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'filter_file.py')
# !python "{filter_file_path}"

# mention_counter_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'mention_counter.py')
# !python "{mention_counter_path}"

# mention_data_combiner_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'mention_data_combiner.py')
# !python "{mention_data_combiner_path}"

# vader_polarity_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'vader_polarity.py')
# !python "{vader_polarity_path}"

# polarity_score_combiner_path = os.path.join(GAME_PRICE_PREDICTION_PATH, 'python_scripts', 'sentiment_analysis', 'polarity_score_combiner.py')
# !python "{polarity_score_combiner_path}"


## market vol against market price

In [None]:
# Load market history data
market_df = pd.read_csv(os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'market_history', 'total_market_history.csv'))

# Convert dates to datetime
market_df['date'] = pd.to_datetime(market_df['date'])

# Apply smoothing
window = 5
market_df['smoothed_volume'] = market_df['volume'].rolling(window=window).mean()
market_df['smoothed_price'] = market_df['price_usd'].rolling(window=window).mean()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=market_df['date'], 
               y=market_df['smoothed_volume'], 
               name="Market Volume", 
               line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=market_df['date'], 
               y=market_df['smoothed_price'], 
               name="Market Price", 
               line=dict(color='red')),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title_text="CS:GO (Entire) Market Volume vs Market Price Over Time",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update axes labels
fig.update_yaxes(title_text="Smoothed Market Volume", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Smoothed Market Price", secondary_y=True, gridcolor='lightgrey')
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show figure
fig.show()

# Save plots
save_dir = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'figures')
os.makedirs(save_dir, exist_ok=True)

# Save static PNG
fig.write_image(
    os.path.join(save_dir, f'csgo_market_vol_price.png'),
    width=1920, 
    height=1080,
    scale=2
)

print(f"Saved to ./data/figures/timeseries_market_vol_price.png")


## market vol against overall sentiment

In [None]:
# Load market history data
market_df = pd.read_csv(os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'market_history', 'total_market_history.csv'))
polarity_df = pd.read_csv(os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'reddit_data', 'polarity_all', 'all_data_all_polarity.csv'))

# Convert dates to datetime
market_df['date'] = pd.to_datetime(market_df['date'])
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Apply smoothing
window = 10
market_df['smoothed_volume'] = market_df['volume'].rolling(window=window).mean()
polarity_df['smoothed_compound'] = polarity_df['compound'].rolling(window=window).mean()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add market volume trace
fig.add_trace(
    go.Scatter(x=market_df['date'], 
               y=market_df['smoothed_volume'], 
               name="Market Volume", 
               line=dict(color='blue')),
    secondary_y=False,
)

# Add polarity compound trace
fig.add_trace(
    go.Scatter(x=polarity_df['date'], 
               y=polarity_df['smoothed_compound'], 
               name="Sentiment Polarity", 
               line=dict(color='green')),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title_text="CS:GO Market Volume and Reddit Sentiment Over Time",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update axes labels
fig.update_yaxes(title_text="Smoothed Market Volume", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Sentiment Polarity (Compound)", secondary_y=True, gridcolor='lightgrey')
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show figure
fig.show()

# Save plots
save_dir = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'figures')
os.makedirs(save_dir, exist_ok=True)

# Save static PNG
fig.write_image(
    os.path.join(save_dir, f'csgo_market_vol_sentiment.png'),
    width=1920, 
    height=1080,
    scale=2
)

print(f"Saved to ./data/figures/csgo_market_vol_sentiment.png")

## market price against overall sentiment

In [None]:
# Load market history data
market_df = pd.read_csv(os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'market_history', 'total_market_history.csv'))
polarity_df = pd.read_csv(os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'reddit_data', 'polarity_all', 'all_data_all_polarity.csv'))

# Convert dates to datetime
market_df['date'] = pd.to_datetime(market_df['date'])
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Apply smoothing
window = 5
market_df['smoothed_price'] = market_df['price_usd'].rolling(window=window).mean()
polarity_df['smoothed_compound'] = polarity_df['compound'].rolling(window=window).mean()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add market price trace
fig.add_trace(
    go.Scatter(x=market_df['date'], 
               y=market_df['smoothed_price'], 
               name="Market Price", 
               line=dict(color='red')),
    secondary_y=False,
)

# Add polarity compound trace
fig.add_trace(
    go.Scatter(x=polarity_df['date'], 
               y=polarity_df['smoothed_compound'], 
               name="Sentiment Polarity", 
               line=dict(color='green')),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title_text="CS:GO Market Price and Reddit Sentiment Over Time",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update axes labels
fig.update_yaxes(title_text="Smoothed Market Price (USD)", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Sentiment Polarity (Compound)", secondary_y=True, gridcolor='lightgrey')
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show figure
fig.show()

# Save plots
save_dir = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'figures')
os.makedirs(save_dir, exist_ok=True)

# Save static PNG
fig.write_image(
    os.path.join(save_dir, f'csgo_market_price_sentiment.png'),
    width=1920, 
    height=1080,
    scale=2
)

print(f"Saved to ./data/figures/csgo_market_price_sentiment.png")

## Windowed scatterplot: market price vs compound sentiment

In [None]:
# Define your date range
start_date = pd.to_datetime('2023-05-01')
end_date = pd.to_datetime('2023-06-01')
window = 7

# Ensure date columns are datetime
market_df['date'] = pd.to_datetime(market_df['date'])
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Merge the dataframes on date
merged_df = pd.merge(market_df, polarity_df, on='date', how='inner')

# Filter dataframe for date range and create a copy
date_filtered_df = merged_df[(merged_df['date'] >= start_date) & 
                            (merged_df['date'] <= end_date)].copy()

# Calculate smoothed values on filtered data using loc
date_filtered_df.loc[:, 'smoothed_price'] = date_filtered_df['price_usd'].rolling(window=window).mean()
date_filtered_df.loc[:, 'smoothed_compound'] = date_filtered_df['compound'].rolling(window=window).mean()

# Remove any NaN values before fitting
clean_df = date_filtered_df.dropna(subset=['smoothed_price', 'smoothed_compound'])

# Create the scatter plot
fig = go.Figure()

# Add scatter points
fig.add_trace(
    go.Scatter(
        x=clean_df['smoothed_price'],
        y=clean_df['smoothed_compound'],
        mode='markers',
        marker=dict(size=8),
        name='Data Points',
        text=clean_df['date'],
        hovertemplate='Smoothed Price: %{x}<br>Smoothed Sentiment: %{y}<br>Date: %{text}<extra></extra>'
    )
)

# Add regression line only if we have valid data
if len(clean_df) > 1:
    try:
        z = np.polyfit(clean_df['smoothed_price'], clean_df['smoothed_compound'], 1)
        fig.add_trace(
            go.Scatter(
                x=clean_df['smoothed_price'],
                y=z[0] * clean_df['smoothed_price'] + z[1],
                mode='lines',
                name=f'Trend line',
                line=dict(color='red')
            )
        )
    except np.linalg.LinAlgError:
        print("Could not calculate regression line due to data issues")

# Update layout for better readability
fig.update_layout(
    title=f"Smoothed Market Price vs. Smoothed Sentiment<br>Date Range: {start_date.date()} to {end_date.date()}<br>Smoothing Window: {window} days",
    xaxis_title="Smoothed Market Price (USD)",
    yaxis_title="Smoothed Sentiment (Compound)",
    height=600,
    width=800,
)

# Show the figure
fig.show()

# saving
save_dir = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'figures')
os.makedirs(save_dir, exist_ok=True)

i = 1
while os.path.exists(os.path.join(save_dir, f'scatter_price_sentiment_{i}.png')):
    i += 1

# Save static PNG
fig.write_image(
    os.path.join(save_dir, f'scatter_price_sentiment_{i}.png'),
    width=1920, 
    height=1080,
    scale=2
)

print(f"Saved to ./data/figures/scatter_price_sentiment_{i}.png")

## Windowed scatterplot: market volume vs compound sentiment

In [None]:
# Define your date range
start_date = pd.to_datetime('2023-01-01')
end_date = pd.to_datetime('2023-06-01')
window = 7

# Ensure date columns are datetime
market_df['date'] = pd.to_datetime(market_df['date'])
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Merge the dataframes on date
merged_df = pd.merge(market_df, polarity_df, on='date', how='inner')

# Filter dataframe for date range and create a copy
date_filtered_df = merged_df[(merged_df['date'] >= start_date) & 
                            (merged_df['date'] <= end_date)].copy()

# Calculate smoothed values on filtered data using loc
date_filtered_df.loc[:, 'smoothed_volume'] = date_filtered_df['volume'].rolling(window=window).mean()
date_filtered_df.loc[:, 'smoothed_compound'] = date_filtered_df['compound'].rolling(window=window).mean()

# Remove any NaN values before fitting
clean_df = date_filtered_df.dropna(subset=['smoothed_volume', 'smoothed_compound'])

# Create the scatter plot
fig = go.Figure()

# Add scatter points
fig.add_trace(
    go.Scatter(
        x=clean_df['smoothed_volume'],
        y=clean_df['smoothed_compound'],
        mode='markers',
        marker=dict(size=8),
        name='Data Points',
        text=clean_df['date'],
        hovertemplate='Smoothed Volume: %{x}<br>Smoothed Sentiment: %{y}<br>Date: %{text}<extra></extra>'
    )
)

# Add regression line only if we have valid data
if len(clean_df) > 1:
    try:
        z = np.polyfit(clean_df['smoothed_volume'], clean_df['smoothed_compound'], 1)
        fig.add_trace(
            go.Scatter(
                x=clean_df['smoothed_volume'],
                y=z[0] * clean_df['smoothed_volume'] + z[1],
                mode='lines',
                name=f'Trend line',
                line=dict(color='red')
            )
        )
    except np.linalg.LinAlgError:
        print("Could not calculate regression line due to data issues")

# Update layout for better readability
fig.update_layout(
    title=f"Smoothed Market Volume vs. Smoothed Sentiment<br>Date Range: {start_date.date()} to {end_date.date()}<br>Smoothing Window: {window} days",
    xaxis_title="Smoothed Market Volume",
    yaxis_title="Smoothed Sentiment (Compound)",
    height=600,
    width=800,
)

# Show the figure
fig.show()

# saving
save_dir = os.path.join(GAME_PRICE_PREDICTION_PATH, 'data', 'figures')
os.makedirs(save_dir, exist_ok=True)

i = 1
while os.path.exists(os.path.join(save_dir, f'scatter_volume_sentiment_{i}.png')):
    i += 1

# Save static PNG
fig.write_image(
    os.path.join(save_dir, f'scatter_volume_sentiment_{i}.png'),
    width=1920, 
    height=1080,
    scale=2
)

print(f"Saved to ./data/figures/scatter_volume_sentiment_{i}.png")