In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
import os
from sklearn.preprocessing import MinMaxScaler
from scipy import stats
from scipy.stats import spearmanr, pearsonr
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA

cwd = os.getcwd()
# print(cwd)

# # for windownoobs
game_price_prediction_path = os.path.abspath(os.path.join(cwd, '..', '..'))

# # for mac
#game_price_prediction_path = os.path.abspath(os.path.join(cwd))

sys.path.insert(0, os.path.abspath(game_price_prediction_path))

from python_scripts.utilities.api_calls import get_cookie_from_blob, fetch_item_to_df, fetch_items
from python_scripts.sentiment_analysis.config import ALL_MENTIONS_FILENAME, ITEM, POLARITY_FOLDER_NAME

""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 
- Takes data from mention_data 
- and a range of others; polarity_data, or fetches price history
- Plots it sexily
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 

### Prerequisites


In [None]:
# Make sure the correct item is imported from config.py

print(f"ITEM = {ITEM}")

In [3]:
# filter_file_path = os.path.join(game_price_prediction_path, 'python_scripts', 'sentiment_analysis', 'filter_file.py')
# !python "{filter_file_path}"

# mention_counter_path = os.path.join(game_price_prediction_path, 'python_scripts', 'sentiment_analysis', 'mention_counter.py')
# !python "{mention_counter_path}"

# mention_data_combiner_path = os.path.join(game_price_prediction_path, 'python_scripts', 'sentiment_analysis', 'mention_data_combiner.py')
# !python "{mention_data_combiner_path}"

# vader_polarity_path = os.path.join(game_price_prediction_path, 'python_scripts', 'sentiment_analysis', 'vader_polarity.py')
# !python "{vader_polarity_path}"


### Plotting polarity & mentions

In [None]:
# Load data
mentions_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'mention_all', ALL_MENTIONS_FILENAME))
polarity_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'polarity_data', POLARITY_FOLDER_NAME, 'csgo_comments.csv'))

# Preprocess data
mentions_df['date'] = pd.to_datetime(mentions_df['date'])
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Calculate average polarity score for each day
polarity_df['compound'] = polarity_df['compound'].astype(float)
daily_polarity = polarity_df.groupby('date')['compound'].mean().reset_index()

# Merge mentions and daily polarity data
merged_df = pd.merge(mentions_df, daily_polarity, on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['num_volume'], name="Mentions", line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['compound'], name="Average Polarity", line=dict(color='red')),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text="Mentions and Average Polarity Over Time for 'Key' in CS:GO Comments",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update y-axes
fig.update_yaxes(title_text="Number of Mentions", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Average Polarity (Compound Score)", secondary_y=True, gridcolor='lightgrey')

# Update x-axis
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()

### Fetch price and volume history

In [None]:
### FETCHING ITEM

# Fetch price and volume data FOR ITEM CHOSEN
dailyCookie = get_cookie_from_blob()
items = fetch_items()
df = fetch_item_to_df(ITEM, dailyCookie)
print(f"Data for {ITEM} fetched")


### Plotting mentions and volume

In [None]:
mentions_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'mention_all', ALL_MENTIONS_FILENAME))

mentions_df['date'] = pd.to_datetime(mentions_df['date'])

# Ensure df has a 'date' column
if 'date' not in df.columns:
    df = df.reset_index()

# Merge mentions and volume data
merged_df = pd.merge(mentions_df, df[['date', 'volume']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['num_mentions'], name="Mentions", line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['volume'], name="Volume", line=dict(color='red')),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text=f"Mentions and Volume Over Time for {ITEM}",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update y-axes
fig.update_yaxes(title_text="Number of Mentions", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Volume Traded", secondary_y=True, gridcolor='lightgrey')

# Update x-axis
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()


### Plotting normalized mentions and normalized price

In [None]:
# Load data 
mentions_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'mention_all', ALL_MENTIONS_FILENAME))

mentions_df['date'] = pd.to_datetime(mentions_df['date'])

# Ensure 'date' is in the index for the price data
if 'date' not in df.columns and df.index.name == 'date':
    df = df.reset_index()
    
# Merge data
merged_df = pd.merge(mentions_df, df[['date', 'price_usd']], on='date', how='outer').sort_values('date').ffill()

# Normalize the data
scaler = MinMaxScaler()
merged_df['normalized_mentions'] = scaler.fit_transform(merged_df[['num_mentions']])
merged_df['normalized_price'] = scaler.fit_transform(merged_df[['price_usd']])

# Create plot
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=merged_df['date'], y=merged_df['normalized_mentions'], name="Mentions"), secondary_y=False)
fig.add_trace(go.Scatter(x=merged_df['date'], y=merged_df['normalized_price'], name="Price (USD)"), secondary_y=True)

fig.update_layout(
    title_text=f"Normalized Mentions and Price Over Time for '{ITEM}'",
    yaxis=dict(title="Normalized Mentions", range=[0, 1]),
    yaxis2=dict(title="Normalized Price", range=[0, 1]),
    xaxis=dict(title="Date"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

fig.show()

## Plotting smoothed mentions and smoothed volume

In [None]:
# Load data
mentions_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'reddit_data', 'mention_all', ALL_MENTIONS_FILENAME))
mentions_df['date'] = pd.to_datetime(mentions_df['date'])

# Ensure df has a 'date' column
if 'date' not in df.columns:
    df = df.reset_index()

# Merge mentions and volume data
merged_df = pd.merge(mentions_df, df[['date', 'volume', 'price_usd']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Smooth mentions and volume using a rolling average
merged_df['smoothed_mentions'] = merged_df['num_mentions'].rolling(window=10).mean()
merged_df['smoothed_volume'] = merged_df['volume'].rolling(window=10).mean()
merged_df['smoothed_price'] = merged_df['price_usd'].rolling(window=10).mean()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['smoothed_mentions'], name="Smoothed Mentions", line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['smoothed_volume'], name="Smoothed Volume", line=dict(color='red')),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text=f"Smoothed Mentions and Volume Over Time for {ITEM}",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# x and y axis labels
fig.update_yaxes(title_text="Smoothed Number of Mentions", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Smoothed Volume Traded", secondary_y=True, gridcolor='lightgrey')
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()


### Plotting smoothed mentions against smoothed volume

In [None]:
# Ensure necessary imports are present
import plotly.graph_objects as go

# Create the scatter plot
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=merged_df['smoothed_volume'],
        y=merged_df['smoothed_mentions'],
        mode='markers',
        marker=dict(size=8),
        text=merged_df['date'],  # Add date information to hover text
        hovertemplate='Smoothed Volume: %{x}<br>Smoothed Mentions: %{y}<br>Date: %{text}<extra></extra>'
    )
)

# Update layout for better readability
fig.update_layout(
    title=f"Smoothed Number of Mentions vs. Smoothed Volume for {ITEM}",
    xaxis_title="Smoothed Volume Traded",
    yaxis_title="Smoothed Number of Mentions",
    height=600,
    width=800,
)

# Show the figure
fig.show()

## Correlation stats

In [None]:
# Ensure no missing values
merged_df = merged_df.dropna(subset=['num_mentions', 'volume'])

print("Item name: " + ITEM)

# 1. Spearman's Correlation
spearman_corr, spearman_p = spearmanr(merged_df['num_mentions'], merged_df['volume'])
print(f"Spearman's correlation: {spearman_corr}, p-value: {spearman_p}")

# 2. Pearson's Correlation
pearson_corr, pearson_p = pearsonr(merged_df['num_mentions'], merged_df['volume'])
print(f"Pearson's correlation: {pearson_corr}, p-value: {pearson_p}")

# 3. Stationarity Test (ADF)
adf_result = adfuller(merged_df['volume'])
print(f"ADF Statistic: {adf_result[0]}, p-value: {adf_result[1]}")