In [22]:
import pymongo
from datetime import datetime

MONGO_URI = "mongodb+srv://leogym2:NPgwIUneOWWyNuMo@cluster0.mf0xa.mongodb.net/"
client = pymongo.MongoClient(MONGO_URI)
db = client["Raw_data"]

# Define collections
stock_collection = db["Stock"]
Stock_reduced = db["Stock_reduced"]



In [24]:
pipeline = [
    { "$unwind": "$Time Series (Daily)" },  # Flatten daily records
    
    # Lookup for 1 day ahead
    {
        "$lookup": {
            "from": "Stock_reduced", #self lookup with data shifted for each time horizons 
            "let": { 
                "stock_id": "$Meta Data.Symbol", 
                "current_date": "$Time Series (Daily).isodate" 
            },
            "pipeline": [
                { "$match": { "$expr": { "$eq": ["$Meta Data.Symbol", "$$stock_id"] } } },
                { "$unwind": "$Time Series (Daily)" },
                {
                    "$match": {
                        "$expr": {
                            "$eq": [
                                "$Time Series (Daily).isodate",
                                { "$dateAdd": { "startDate": "$$current_date", "unit": "day", "amount": 1 } }
                                
                                                            ]
                                                        }
                                                    }
                                                }
                  ],
                  "as": "next_day"
            }
    },
    
    # Lookup for 5 days ahead
    {
        "$lookup": {
            "from": "Stock_reduced",
            "let": { 
                "stock_id": "$Meta Data.Symbol", 
                "current_date": "$Time Series (Daily).isodate" 
            },
            "pipeline": [
                { "$match": { "$expr": { "$eq": ["$Meta Data.Symbol", "$$stock_id"] } } },
                { "$unwind": "$Time Series (Daily)" },
                {
                    "$match": {
                        "$expr": {
                            "$eq": [
                                "$Time Series (Daily).isodate",
                                { "$dateAdd": { "startDate": "$$current_date", "unit": "day", "amount": 7 } } # Skip weekends, 5 business days ≈ 7 actual days
                            ]
                        }
                    }
                }
            ],
            "as": "next_5_days"
        }
    },

    # Lookup for 15 days ahead
    {
        "$lookup": {
            "from": "Stock_reduced",
            "let": { 
                "stock_id": "$Meta Data.Symbol", 
                "current_date": "$Time Series (Daily).isodate" 
            },
            "pipeline": [
                { "$match": { "$expr": { "$eq": ["$Meta Data.Symbol", "$$stock_id"] } } },
                { "$unwind": "$Time Series (Daily)" },
                {
                    "$match": {
                        "$expr": {
                            "$eq": [
                                "$Time Series (Daily).isodate",
                                { "$dateAdd": { "startDate": "$$current_date", "unit": "day", "amount": 21 } } # Skip weekends, 15 business days ≈ 21 actual days
                            ]
                        }
                    }
                }
            ],
            "as": "next_15_days"
        }
    },

    { "$unwind": { "path": "$next_day", "preserveNullAndEmptyArrays": True } }, 
    { "$unwind": { "path": "$next_5_days", "preserveNullAndEmptyArrays": True } },
    { "$unwind": { "path": "$next_15_days", "preserveNullAndEmptyArrays": True } },

    {  
        "$set": {
            "symbol": "$Meta Data.Symbol",
            "date": "$Time Series (Daily).isodate",
            "close": "$Time Series (Daily).close",
            "open": "$Time Series (Daily).open",
            "high": "$Time Series (Daily).high",
            "low": "$Time Series (Daily).low",
            "volume": "$Time Series (Daily).volume",

            # Change in close price for 1 day
            "change_close_next_1_day": {
                "$cond": {
                    "if": { "$ne": ["$next_day", None] },
                    "then": {
                        "$round": [
                            {
                                "$multiply": [
                                    {
                                        "$divide": [
                                            { "$subtract": ["$next_day.Time Series (Daily).close", "$Time Series (Daily).close"] },
                                            "$Time Series (Daily).close"
                                        ]
                                    },
                                    100
                                ]
                            },
                            2
                        ]
                    },
                    "else": None
                }
            },

            # Change in close price for 5 days
            "change_close_next_5_days": {
                "$cond": {
                    "if": { "$ne": ["$next_5_days", None] },
                    "then": {
                        "$round": [
                            {
                                "$multiply": [
                                    {
                                        "$divide": [
                                            { "$subtract": ["$next_5_days.Time Series (Daily).close", "$Time Series (Daily).close"] },
                                            "$Time Series (Daily).close"
                                        ]
                                    },
                                    100
                                ]
                            },
                            2
                        ]
                    },
                    "else": None
                }
            },

            # Change in close price for 15 days
            "change_close_next_15_days": {
                "$cond": {
                    "if": { "$ne": ["$next_15_days", None] },
                    "then": {
                        "$round": [
                            {
                                "$multiply": [
                                    {
                                        "$divide": [
                                            { "$subtract": ["$next_15_days.Time Series (Daily).close", "$Time Series (Daily).close"] },
                                            "$Time Series (Daily).close"
                                        ]
                                    },
                                    100
                                ]
                            },
                            2
                        ]
                    },
                    "else": None
                }
            },

            # Change in volume for 1 day
            "change_volume_next_1_day": {
                "$cond": {
                    "if": { "$ne": ["$next_day", None] },
                    "then": {
                        "$round": [
                            {
                                "$multiply": [
                                    {
                                        "$divide": [
                                            { "$subtract": ["$next_day.Time Series (Daily).volume", "$Time Series (Daily).volume"] },
                                            "$Time Series (Daily).volume"
                                        ]
                                    },
                                    100
                                ]
                            },
                            2
                        ]
                    },
                    "else": None
                }
            },

            # Change in volume for 5 days
            "change_volume_next_5_days": {
                "$cond": {
                    "if": { "$ne": ["$next_5_days", None] },
                    "then": {
                        "$round": [
                            {
                                "$multiply": [
                                    {
                                        "$divide": [
                                            { "$subtract": ["$next_5_days.Time Series (Daily).volume", "$Time Series (Daily).volume"] },
                                            "$Time Series (Daily).volume"
                                        ]
                                    },
                                    100
                                ]
                            },
                            2
                        ]
                    },
                    "else": None
                }
            },

            # Change in volume for 15 days
            "change_volume_next_15_days": {
                "$cond": {
                    "if": { "$ne": ["$next_15_days", None] },
                    "then": {
                        "$round": [
                            {
                                "$multiply": [
                                    {
                                        "$divide": [
                                            { "$subtract": ["$next_15_days.Time Series (Daily).volume", "$Time Series (Daily).volume"] },
                                            "$Time Series (Daily).volume"
                                        ]
                                    },
                                    100
                                ]
                            },
                            2
                        ]
                    },
                    "else": None
                }
            }
        }
    },

    {
        "$project": {
            "_id": 0,
            "symbol": 1,
            "date": 1,
            "close": 1,"open":1, "low":1, "high":1,
            "volume": 1,
            "change_close_next_1_day": 1,
            "change_close_next_5_days": 1,
            "change_close_next_15_days": 1,
            "change_volume_next_1_day": 1,
            "change_volume_next_5_days": 1,
            "change_volume_next_15_days": 1
        }
    }
]

# Execute the aggregation
result = list(Stock_reduced.aggregate(pipeline))
print(result[900:1300])


[{'symbol': 'AAPL', 'date': datetime.datetime(2021, 7, 16, 0, 0), 'close': 146.39, 'open': 148.46, 'high': 149.76, 'low': 145.88, 'volume': 93251426, 'change_close_next_1_day': None, 'change_close_next_5_days': 1.48, 'change_close_next_15_days': -0.17, 'change_volume_next_1_day': None, 'change_volume_next_5_days': -23.38, 'change_volume_next_15_days': -41.96}, {'symbol': 'AAPL', 'date': datetime.datetime(2021, 7, 15, 0, 0), 'close': 148.48, 'open': 149.24, 'high': 150.0, 'low': 147.09, 'volume': 106820297, 'change_close_next_1_day': -1.41, 'change_close_next_5_days': -1.13, 'change_close_next_15_days': -0.96, 'change_volume_next_1_day': -12.7, 'change_volume_next_5_days': -27.6, 'change_volume_next_15_days': -56.56}, {'symbol': 'AAPL', 'date': datetime.datetime(2021, 7, 14, 0, 0), 'close': 149.15, 'open': 148.1, 'high': 149.57, 'low': 147.68, 'volume': 127050785, 'change_close_next_1_day': -0.45, 'change_close_next_5_days': -2.51, 'change_close_next_15_days': -1.48, 'change_volume_next

In [25]:
db["Stock_performance"].insert_many(result)

InsertManyResult([ObjectId('67b34c5ee1025b242fd24956'), ObjectId('67b34c5ee1025b242fd24957'), ObjectId('67b34c5ee1025b242fd24958'), ObjectId('67b34c5ee1025b242fd24959'), ObjectId('67b34c5ee1025b242fd2495a'), ObjectId('67b34c5ee1025b242fd2495b'), ObjectId('67b34c5ee1025b242fd2495c'), ObjectId('67b34c5ee1025b242fd2495d'), ObjectId('67b34c5ee1025b242fd2495e'), ObjectId('67b34c5ee1025b242fd2495f'), ObjectId('67b34c5ee1025b242fd24960'), ObjectId('67b34c5ee1025b242fd24961'), ObjectId('67b34c5ee1025b242fd24962'), ObjectId('67b34c5ee1025b242fd24963'), ObjectId('67b34c5ee1025b242fd24964'), ObjectId('67b34c5ee1025b242fd24965'), ObjectId('67b34c5ee1025b242fd24966'), ObjectId('67b34c5ee1025b242fd24967'), ObjectId('67b34c5ee1025b242fd24968'), ObjectId('67b34c5ee1025b242fd24969'), ObjectId('67b34c5ee1025b242fd2496a'), ObjectId('67b34c5ee1025b242fd2496b'), ObjectId('67b34c5ee1025b242fd2496c'), ObjectId('67b34c5ee1025b242fd2496d'), ObjectId('67b34c5ee1025b242fd2496e'), ObjectId('67b34c5ee1025b242fd249

In [None]:
db["Stock_performance"].create_index([("date", 1), ("symbol", 1)])