In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
import os
from sklearn.preprocessing import MinMaxScaler
from scipy import stats
from scipy.stats import spearmanr, pearsonr
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA

cwd = os.getcwd()
# print(cwd)

# # for windownoobs
game_price_prediction_path = os.path.abspath(os.path.join(cwd, '..', '..'))

# # for mac
#game_price_prediction_path = os.path.abspath(os.path.join(cwd))

sys.path.insert(0, os.path.abspath(game_price_prediction_path))

from python_scripts.utilities.api_calls import get_cookie_from_blob, fetch_item_to_df, fetch_items
from python_scripts.sentiment_analysis.config import ITEM, ALL_POLARITY_FILENAME

""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 
- Takes data from mention_data 
- and a range of others; polarity_data, or fetches price history
- Plots it sexily
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 

### Prerequisites


In [None]:
# Make sure the correct item is imported from config.py

print(f"ITEM = {ITEM}")

In [4]:
# filter_file_path = os.path.join(game_price_prediction_path, 'python_scripts', 'sentiment_analysis', 'filter_file.py')
# !python "{filter_file_path}"

# mention_counter_path = os.path.join(game_price_prediction_path, 'python_scripts', 'sentiment_analysis', 'mention_counter.py')
# !python "{mention_counter_path}"

# mention_data_combiner_path = os.path.join(game_price_prediction_path, 'python_scripts', 'sentiment_analysis', 'mention_data_combiner.py')
# !python "{mention_data_combiner_path}"

# vader_polarity_path = os.path.join(game_price_prediction_path, 'python_scripts', 'sentiment_analysis', 'vader_polarity.py')
# !python "{vader_polarity_path}"


### Fetch price and volume history

In [None]:
### FETCHING ITEM

# Fetch price and volume data FOR ITEM CHOSEN
dailyCookie = get_cookie_from_blob()
items = fetch_items()
df = fetch_item_to_df(ITEM, dailyCookie)
print(f"Data for {ITEM} fetched")


### Plotting compound polarity and volume

In [None]:
# Load polarity data instead of mentions
polarity_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'polarity_all', ALL_POLARITY_FILENAME))
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Ensure df has a 'date' column
if 'date' not in df.columns:
    df = df.reset_index()

# Merge polarity and volume data
merged_df = pd.merge(polarity_df, df[['date', 'volume']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['compound'], name="Sentiment", line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['volume'], name="Volume", line=dict(color='red')),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text=f"Sentiment and Volume Over Time for {ITEM}",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update y-axes
fig.update_yaxes(title_text="Sentiment Score (Compound)", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Volume Traded", secondary_y=True, gridcolor='lightgrey')

# Update x-axis
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()

### Plotting compound polarity and price

In [None]:
# Load polarity data instead of mentions
polarity_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'polarity_all', ALL_POLARITY_FILENAME))
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Ensure df has a 'date' column
if 'date' not in df.columns:
    df = df.reset_index()

# Merge polarity and volume data
merged_df = pd.merge(polarity_df, df[['date', 'price_usd']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['compound'], name="Sentiment", line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['price_usd'], name="Price", line=dict(color='red')),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text=f"Sentiment and Volume Over Time for {ITEM}",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update y-axes
fig.update_yaxes(title_text="Sentiment Score (Compound)", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Volume Traded", secondary_y=True, gridcolor='lightgrey')

# Update x-axis
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()

### Plotting smoothened compound polarity and smoothened volume

In [None]:
# Load polarity data instead of mentions
polarity_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'polarity_all', ALL_POLARITY_FILENAME))
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Ensure df has a 'date' column
if 'date' not in df.columns:
    df = df.reset_index()

# Merge polarity and volume data
merged_df = pd.merge(polarity_df, df[['date', 'volume']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Calculate smoothened versions (7-day rolling average)
window_size = 20
merged_df['smooth_compound'] = merged_df['compound'].rolling(window=window_size).mean()
merged_df['smooth_volume'] = merged_df['volume'].rolling(window=window_size).mean()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add smoothened traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['smooth_compound'], 
               name="Smoothed Sentiment", line=dict(color='blue', width=2.5)),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['smooth_volume'], 
               name="Smoothed Volume", line=dict(color='red', width=2.5)),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text=f"Smoothed Sentiment and Volume Over Time for {ITEM} ({window_size}-day moving average)",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update y-axes
fig.update_yaxes(title_text="Sentiment Score (Compound)", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Volume Traded", secondary_y=True, gridcolor='lightgrey')

# Update x-axis
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()

### Plotting smoothened compound polarity and smoothened price

In [None]:
# Load polarity data instead of mentions
polarity_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'polarity_all', ALL_POLARITY_FILENAME))
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Ensure df has a 'date' column
if 'date' not in df.columns:
    df = df.reset_index()

# Merge polarity and price data
merged_df = pd.merge(polarity_df, df[['date', 'price_usd']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Calculate smoothened versions (20-day rolling average)
window_size = 30
merged_df['smooth_compound'] = merged_df['compound'].rolling(window=window_size).mean()
merged_df['smooth_price'] = merged_df['price_usd'].rolling(window=window_size).mean()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add smoothened traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['smooth_compound'], 
               name="Smoothed Sentiment", line=dict(color='blue', width=2.5)),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['smooth_price'], 
               name="Smoothed Price", line=dict(color='red', width=2.5)),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text=f"Smoothed Sentiment and Price Over Time for {ITEM} ({window_size}-day moving average)",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update y-axes
fig.update_yaxes(title_text="Sentiment Score (Compound)", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Price (USD)", secondary_y=True, gridcolor='lightgrey')

# Update x-axis
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()

### Plotting compound polarity against volume

In [18]:
# Load and prepare data as before
polarity_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'polarity_all', ALL_POLARITY_FILENAME))
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

if 'date' not in df.columns:
    df = df.reset_index()

merged_df = pd.merge(polarity_df, df[['date', 'volume']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Create scatter plot
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=merged_df['volume'],
        y=merged_df['compound'],
        mode='markers',
        marker=dict(
            color='blue',
            size=8,
            opacity=0.6
        ),
        name="Sentiment vs Volume"
    )
)

# Update layout
fig.update_layout(
    title_text=f"Sentiment vs Volume Correlation for {ITEM}",
    xaxis_title="Volume Traded",
    yaxis_title="Sentiment Score (Compound)",
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Add gridlines
fig.update_xaxes(gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')

# Show the figure
fig.show()

# Optional: Calculate and display correlation coefficient
correlation = merged_df['compound'].corr(merged_df['volume'])
print(f"Correlation coefficient between sentiment and volume: {correlation:.3f}")


DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.



Correlation coefficient between sentiment and volume: -0.156


### Plotting compound polarity against price

In [20]:
# Load and prepare data as before
polarity_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'polarity_all', ALL_POLARITY_FILENAME))
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

if 'date' not in df.columns:
    df = df.reset_index()

merged_df = pd.merge(polarity_df, df[['date', 'price_usd']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Create scatter plot
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=merged_df['price_usd'],
        y=merged_df['compound'],
        mode='markers',
        marker=dict(
            color='blue',
            size=8,
            opacity=0.6
        ),
        name="Sentiment vs Price"
    )
)

# Update layout
fig.update_layout(
    title_text=f"Sentiment vs Price Correlation for {ITEM}",
    xaxis_title="Price (USD)",
    yaxis_title="Sentiment Score (Compound)",
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Add gridlines
fig.update_xaxes(gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')

# Show the figure
fig.show()

# Optional: Calculate and display correlation coefficient
correlation = merged_df['compound'].corr(merged_df['price_usd'])
print(f"Correlation coefficient between sentiment and price: {correlation:.3f}")


DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.



Correlation coefficient between sentiment and price: 0.221
