In [1]:
# Collection Timeline and Spending Analysis
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import sys
sys.path.append('../src')

from dotenv import load_dotenv
load_dotenv('../.env')

from google.cloud import bigquery
from config.bigquery_config import config

client = config.get_client()

# Collection value over time
timeline_query = """
SELECT 
    pc.purchase_date,
    pc.purchase_price,
    dr.title,
    dr.artist,
    pc.condition,
    pc.personal_rating,
    SUM(pc.purchase_price) OVER (ORDER BY pc.purchase_date) as cumulative_value
FROM `vinyl_catalog.personal_collection` pc
JOIN `vinyl_catalog.discogs_releases` dr ON pc.release_id = dr.release_id
ORDER BY pc.purchase_date
"""

timeline_data = client.query(timeline_query).to_dataframe()
print("Collection Timeline:")
print(timeline_data.to_string(index=False))

Collection Timeline:
purchase_date  purchase_price          title              artist condition  personal_rating  cumulative_value
   2020-02-27              28   Kind of Blue         Miles Davis       VG+                9                28
   2020-05-17              45 A Love Supreme       John Coltrane      Mint               10                73
   2020-09-28              35    Giant Steps       John Coltrane        VG                9               108
   2021-02-05              32     Blue Train       John Coltrane Near Mint                8               140
   2021-06-15              25 Somethin' Else Cannonball Adderley     Good+                8               165


In [None]:
# Create collection visualizations
plt.style.use('seaborn-v0_8')
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# 1. Collection value over time
timeline_data['purchase_date'] = pd.to_datetime(timeline_data['purchase_date'])
ax1.plot(timeline_data['purchase_date'], timeline_data['cumulative_value'], 
         marker='o', linewidth=2, markersize=8)
ax1.set_title('Collection Value Growth Over Time')
ax1.set_ylabel('Cumulative Value ($)')
ax1.grid(True, alpha=0.3)

# 2. Price by condition
condition_data = timeline_data.groupby('condition')['purchase_price'].mean().sort_values(ascending=False)
ax2.bar(condition_data.index, condition_data.values, color='skyblue')
ax2.set_title('Average Price by Condition')
ax2.set_ylabel('Price ($)')
ax2.tick_params(axis='x', rotation=45)

# 3. Personal rating vs purchase price
ax3.scatter(timeline_data['purchase_price'], timeline_data['personal_rating'], 
           s=100, alpha=0.7, color='orange')
ax3.set_xlabel('Purchase Price ($)')
ax3.set_ylabel('Personal Rating (1-10)')
ax3.set_title('Price vs Personal Rating')
ax3.grid(True, alpha=0.3)

# 4. Artist frequency
artist_counts = timeline_data['artist'].value_counts()
ax4.pie(artist_counts.values, labels=artist_counts.index, autopct='%1.1f%%', startangle=90)
ax4.set_title('Collection by Artist')

plt.tight_layout()
plt.show()

print(f"Total collection value: ${timeline_data['cumulative_value'].iloc[-1]}")
print(f"Average price per album: ${timeline_data['purchase_price'].mean():.2f}")
print(f"Price range: ${timeline_data['purchase_price'].min()} - ${timeline_data['purchase_price'].max()}")