In [18]:
import streamlit as st
import pandas as pd
from sqlalchemy import create_engine
import plotly.graph_objects as go
import os
import json
import plotly.express as px
import numpy as np

def load_credentials(path = "aws_rds_credentials.json"):
     with open(path, 'r') as file:
          config = json.load(file)

     # set up credentials
     for key in config.keys():
          os.environ[key] = config[key]

     return


load_credentials()

aws_rds_url = f"postgresql://{os.environ['user']}:{os.environ['password']}@{os.environ['host']}:{os.environ['port']}/{os.environ['database']}?sslmode=require"

# Load a sample dataset
def load_data():
    engine = create_engine(aws_rds_url)
    sql_query = f"""SELECT date, AVG(catalog_total_items) as catalog_total_items, catalog_id
                    FROM public.products_catalog
                    WHERE date >= CURRENT_DATE - INTERVAL '7 days'
                    GROUP BY catalog_id, date
                    ORDER BY date ASC
                    """
    df = pd.read_sql(sql_query, engine)
    return (df)

data = load_data()
data

Unnamed: 0,date,catalog_total_items,catalog_id
0,2024-03-12,5.790910e+05,98
1,2024-03-12,9.139950e+04,221
2,2024-03-12,3.038047e+05,246
3,2024-03-12,1.930322e+06,267
4,2024-03-12,5.230180e+05,287
...,...,...,...
79,2024-03-29,2.204415e+05,1811
80,2024-03-29,7.057585e+05,1812
81,2024-03-29,4.640105e+05,1815
82,2024-03-29,2.614450e+05,2320


In [19]:
data = data.dropna()
data.isna().sum()

date                   0
catalog_total_items    0
catalog_id             0
dtype: int64

In [20]:
data = data.drop_duplicates()
data

Unnamed: 0,date,catalog_total_items,catalog_id
0,2024-03-12,5.790910e+05,98
1,2024-03-12,9.139950e+04,221
2,2024-03-12,3.038047e+05,246
3,2024-03-12,1.930322e+06,267
4,2024-03-12,5.230180e+05,287
...,...,...,...
79,2024-03-29,2.204415e+05,1811
80,2024-03-29,7.057585e+05,1812
81,2024-03-29,4.640105e+05,1815
82,2024-03-29,2.614450e+05,2320


In [21]:
import plotly.graph_objs as go

# Get unique catalog_ids
unique_catalog_ids = data['catalog_id'].unique()

# Create a line graph
fig = go.Figure()

data['items_diff'] = data['catalog_total_items'] - data.groupby('catalog_id')['catalog_total_items'].transform('first')

# Loop through each unique catalog_id and add a trace for each one
for catalog_id in unique_catalog_ids:
    catalog_data = data[data['catalog_id'] == catalog_id]
    if catalog_data.size > 10:
        fig.add_trace(go.Scatter(x=catalog_data['date'], y=catalog_data['items_diff'], mode='lines', name=f'Catalog {catalog_id}'))

# Update layout
fig.update_layout(title='Catalog Total Items Over Time',
                  xaxis_title='Date',
                  yaxis_title='Total Items')

