In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
import os
from sklearn.preprocessing import MinMaxScaler
from scipy import stats
cwd = os.getcwd()
# print(cwd)

# # for windowpros
game_price_prediction_path = os.path.abspath(os.path.join(cwd, '..', '..'))

# # for ma:c
#game_price_prediction_path = os.path.abspath(os.path.join(cwd))

sys.path.insert(0, os.path.abspath(game_price_prediction_path))

from python_scripts.utilities.api_calls import get_cookie_from_blob, fetch_item_to_df, fetch_items
from python_scripts.sentiment_analysis.config import ALL_MENTIONS_FILENAME, ITEM, POLARITY_FOLDER_NAME

""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 
- Takes data from mention_data 
- and a range of others; polarity_data, or fetches price history
- Plots it sexily
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 

### for plotting polarity & mentions

In [None]:
# Load data
mentions_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'Reddit_data', 'mention_all', ALL_MENTIONS_FILENAME))
polarity_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'Reddit_data', 'polarity_data', POLARITY_FOLDER_NAME, 'csgo_comments.csv'))

# Preprocess data
mentions_df['date'] = pd.to_datetime(mentions_df['date'])
polarity_df['date'] = pd.to_datetime(polarity_df['date'])

# Calculate average polarity score for each day
polarity_df['compound'] = polarity_df['compound'].astype(float)
daily_polarity = polarity_df.groupby('date')['compound'].mean().reset_index()

# Merge mentions and daily polarity data
merged_df = pd.merge(mentions_df, daily_polarity, on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['num_mentions'], name="Mentions", line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['compound'], name="Average Polarity", line=dict(color='red')),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text="Mentions and Average Polarity Over Time for 'Key' in CS:GO Comments",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update y-axes
fig.update_yaxes(title_text="Number of Mentions", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Average Polarity (Compound Score)", secondary_y=True, gridcolor='lightgrey')

# Update x-axis
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()

### setting up mention + vol/price plotting

In [None]:
### FETCHING ITEM

# Fetch price and volume data FOR ITEM CHOSEN
dailyCookie = get_cookie_from_blob()
items = fetch_items()
df = fetch_item_to_df(ITEM, dailyCookie)
print(f"Data for {ITEM} fetched")


### plotting mentions agasint volume

In [None]:
mentions_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'Reddit_data', 'mention_all', ALL_MENTIONS_FILENAME))

mentions_df['date'] = pd.to_datetime(mentions_df['date'])

# Ensure df has a 'date' column
if 'date' not in df.columns:
    df = df.reset_index()

# Merge mentions and volume data
merged_df = pd.merge(mentions_df, df[['date', 'volume']], on='date', how='outer').sort_values('date')
merged_df = merged_df.fillna(method='ffill')

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['num_mentions'], name="Mentions", line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=merged_df['date'], y=merged_df['volume'], name="Volume", line=dict(color='red')),
    secondary_y=True,
)

# Update layout for better readability
fig.update_layout(
    title_text=f"Mentions and Volume Over Time for 'AK47 Redline'",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=50, r=50, t=80, b=50),
)

# Update y-axes
fig.update_yaxes(title_text="Number of Mentions", secondary_y=False, gridcolor='lightgrey')
fig.update_yaxes(title_text="Volume Traded", secondary_y=True, gridcolor='lightgrey')

# Update x-axis
fig.update_xaxes(title_text="Date", gridcolor='lightgrey')

# Show the figure
fig.show()


In [None]:
# Load data 
mentions_df = pd.read_csv(os.path.join(game_price_prediction_path, 'data', 'Reddit_data', 'mention_all', ALL_MENTIONS_FILENAME))

mentions_df['date'] = pd.to_datetime(mentions_df['date'])

# Ensure 'date' is in the index for the price data
if 'date' not in df.columns and df.index.name == 'date':
    df = df.reset_index()
    
# Merge data
merged_df = pd.merge(mentions_df, df[['date', 'price_usd']], on='date', how='outer').sort_values('date').ffill()

# Normalize the data
scaler = MinMaxScaler()
merged_df['normalized_mentions'] = scaler.fit_transform(merged_df[['num_mentions']])
merged_df['normalized_price'] = scaler.fit_transform(merged_df[['price_usd']])

# Create plot
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=merged_df['date'], y=merged_df['normalized_mentions'], name="Mentions"), secondary_y=False)
fig.add_trace(go.Scatter(x=merged_df['date'], y=merged_df['normalized_price'], name="Price (USD)"), secondary_y=True)

fig.update_layout(
    title_text=f"Normalized Mentions and Price Over Time for '{ITEM}'",
    yaxis=dict(title="Normalized Mentions", range=[0, 1]),
    yaxis2=dict(title="Normalized Price", range=[0, 1]),
    xaxis=dict(title="Date"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

fig.show()


