In [68]:
# Chunk 1: imports and DB setup
import pandas as pd
from sqlalchemy import create_engine, text

DB_USER = "pamudithasenanayake"
DB_PASS = "123"
DB_HOST = "localhost"
DB_NAME = "fashionsite"

engine = create_engine(f"postgresql+psycopg2://{DB_USER}:{DB_PASS}@{DB_HOST}/{DB_NAME}")


In [69]:
# def fetch_fashion_data(limit=100):
#     query = f"SELECT * FROM synthetic_fashion_trends ORDER BY timestamp DESC LIMIT {limit}"
#     df = pd.read_sql(query, engine)
#     df['hashtags'] = df['hashtags'].apply(lambda x: list(x) if x else [])
#     df['tags'] = df['tags'].apply(lambda x: list(x) if x else [])
#     df = df.drop_duplicates(subset=['trend_name'], keep='first')
#     return df

# Chunk 2 — updated
def fetch_fashion_data(limit=100):
    query = f"SELECT * FROM synthetic_fashion_trends ORDER BY timestamp DESC LIMIT {limit}"
    df = pd.read_sql(query, engine)
    df['hashtags'] = df['hashtags'].apply(lambda x: list(x) if x else [])
    df['tags'] = df['tags'].apply(lambda x: list(x) if x else [])

    # QUICK FIX: add small random noise
    import numpy as np
    df['trend_score'] = df['trend_score'] + np.random.uniform(-0.05, 0.05, size=len(df))
    df['trend_score'] = df['trend_score'].clip(0,1)
    return df



In [70]:
# Chunk 3: predict trend_score if missing
from sklearn.ensemble import RandomForestRegressor

class TrendPredictor:
    def __init__(self):
        self.model = RandomForestRegressor(n_estimators=50, random_state=42)

    def train(self, df):
        df['content_length'] = df['content'].str.len()
        df['num_hashtags'] = df['hashtags'].apply(len)
        df['num_tags'] = df['tags'].apply(len)
        features = ['likes','shares','comments','content_length','num_hashtags','num_tags']
        self.model.fit(df[features], df['trend_score'])
        return self


In [71]:
# Chunk 4: predict missing trend scores
def predict_missing_scores(df, predictor):
    mask = df['trend_score'].isnull()
    if mask.sum() > 0:
        df_missing = df[mask].copy()
        df_missing['content_length'] = df_missing['content'].str.len()
        df_missing['num_hashtags'] = df_missing['hashtags'].apply(len)
        df_missing['num_tags'] = df_missing['tags'].apply(len)
        features = ['likes','shares','comments','content_length','num_hashtags','num_tags']
        df.loc[mask, 'trend_score'] = predictor.model.predict(df_missing[features])
    df['predicted_trend_score'] = df['trend_score']
    return df


In [72]:
# Chunk 5: prepare lag features for forecasting
# from sklearn.ensemble import RandomForestRegressor
#
# class ForecastAgent:
#     def __init__(self):
#         self.model = RandomForestRegressor(n_estimators=50, random_state=42)
#
#     def prepare_features(self, df):
#         df = df.sort_values(by=['trend_name','timestamp'])
#         df['prev_score'] = df.groupby('trend_name')['trend_score'].shift(1)
#         df['prev_score2'] = df.groupby('trend_name')['trend_score'].shift(2)
#         df['prev_score3'] = df.groupby('trend_name')['trend_score'].shift(3)
#         df = df.fillna(0)
#         return df
#

    # Chunk 5 — updated
from sklearn.ensemble import RandomForestRegressor


class ForecastAgent:
    def __init__(self):
        self.model = RandomForestRegressor(n_estimators=50, random_state=42)

    def prepare_features(self, df):
        df = df.sort_values(by=['trend_name', 'timestamp'])
        df['prev_score'] = df.groupby('trend_name')['trend_score'].shift(1)
        df['prev_score2'] = df.groupby('trend_name')['trend_score'].shift(2)
        df['prev_score3'] = df.groupby('trend_name')['trend_score'].shift(3)
        # Rolling mean/std
        df['rolling_mean'] = df.groupby('trend_name')['trend_score'].transform(
            lambda x: x.rolling(3, min_periods=1).mean())
        df['rolling_std'] = df.groupby('trend_name')['trend_score'].transform(
            lambda x: x.rolling(3, min_periods=1).std().fillna(0))
        df = df.fillna(0)
        return df



In [73]:
# Chunk 6
def train_forecast(df, agent):
    df = agent.prepare_features(df)
    features = ['prev_score','prev_score2','prev_score3','likes','shares','comments','rolling_mean','rolling_std']
    agent.model.fit(df[features], df['trend_score'])
    return agent

# Chunk 7
def forecast_trends(df, agent):
    df = agent.prepare_features(df)
    features = ['prev_score','prev_score2','prev_score3','likes','shares','comments','rolling_mean','rolling_std']
    df['forecasted_trend_score'] = agent.model.predict(df[features])
    return df[['trend_name','forecasted_trend_score']]


In [74]:
# Chunk 8: compute trend direction with smaller thresholds
class TrendDirectionAgent:
    def __init__(self, up_threshold=0.01, down_threshold=-0.01):
        self.up_threshold = up_threshold
        self.down_threshold = down_threshold

    def compute_direction(self, df, score_column='trend_score'):
        df = df.sort_values(by=['trend_name','timestamp'])
        df['prev_score'] = df.groupby('trend_name')[score_column].shift(1)
        df['score_change'] = df[score_column] - df['prev_score']
        df['trend_direction'] = df['score_change'].apply(
            lambda x: 'up' if x > self.up_threshold else ('down' if x < self.down_threshold else 'stable')
        )
        df['trend_direction'] = df['trend_direction'].fillna('stable')
        return df


In [75]:
# Chunk 9: optional random synthetic forecast
import random

class TrendForecaster:
    def forecast(self, predicted_score):
        change = random.uniform(-0.15, 0.15)
        forecast_score = max(0.0, min(1.0, predicted_score + change))
        if forecast_score > predicted_score + 0.05:
            direction = "up"
        elif forecast_score < predicted_score - 0.05:
            direction = "down"
        else:
            direction = "stable"
        return forecast_score, direction


In [76]:
# Chunk 10: full pipeline test
df = fetch_fashion_data(limit=100)
predictor = TrendPredictor().train(df)
df = predict_missing_scores(df, predictor)

forecast_agent = ForecastAgent()
forecast_agent = train_forecast(df, forecast_agent)
df_forecast = forecast_trends(df, forecast_agent)

direction_agent = TrendDirectionAgent()
df = direction_agent.compute_direction(df, score_column='predicted_trend_score')

# show combined results
df_final = df.merge(df_forecast, on='trend_name', how='left')
print(df_final[['trend_name','predicted_trend_score','forecasted_trend_score','trend_direction']].head(20))


    trend_name  predicted_trend_score  forecasted_trend_score trend_direction
0   bucket hat               0.707545                0.672959          stable
1   bucket hat               0.707545                0.595618          stable
2   bucket hat               0.707545                0.629346          stable
3   bucket hat               0.707545                0.481539          stable
4   bucket hat               0.707545                0.644070          stable
5   bucket hat               0.707545                0.034200          stable
6   bucket hat               0.707545                0.113744          stable
7   bucket hat               0.596412                0.672959            down
8   bucket hat               0.596412                0.595618            down
9   bucket hat               0.596412                0.629346            down
10  bucket hat               0.596412                0.481539            down
11  bucket hat               0.596412                0.644070   

  df = df.fillna(0)
  df = df.fillna(0)


In [77]:
# Chunk 11: compute overall trend direction per trend
def compute_overall_direction(df, predicted_col='predicted_trend_score', forecast_col='forecasted_trend_score',
                              up_threshold=0.01, down_threshold=-0.01):
    # Aggregate: mean predicted & forecasted score per trend
    agg = df.groupby('trend_name').agg({
        predicted_col: 'mean',
        forecast_col: 'mean'
    }).reset_index()

    # Compute difference
    agg['score_change'] = agg[forecast_col] - agg[predicted_col]

    # Assign overall trend
    agg['trend_direction'] = agg['score_change'].apply(
        lambda x: 'up' if x > up_threshold else ('down' if x < down_threshold else 'stable')
    )
    return agg[['trend_name', predicted_col, forecast_col, 'trend_direction']]


In [78]:
# Chunk 12: apply overall trend computation
df_overall = compute_overall_direction(df_final)
print(df_overall.sort_values(by='forecasted_trend_score', ascending=False).head(20))


          trend_name  predicted_trend_score  forecasted_trend_score  \
8      sneaker trend               0.538961                0.537876   
2     denim overalls               0.528771                0.521706   
7        plaid skirt               0.457547                0.471404   
9  streetwear hoodie               0.475522                0.465730   
1           crop top               0.458692                0.455801   
5        neon colors               0.461049                0.453536   
0         bucket hat               0.471188                0.453068   
4      leather boots               0.452066                0.449230   
6   oversized jacket               0.428814                0.426518   
3       floral dress               0.380885                0.402906   

  trend_direction  
8          stable  
2          stable  
7              up  
9          stable  
1          stable  
5          stable  
0            down  
4          stable  
6          stable  
3              up 