<a href="https://colab.research.google.com/github/rishita-bisht/Cris-Ai/blob/main/CRIS_Full_Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Check versions (TensorFlow is pre-installed)
import pandas as pd
import numpy as np
import sklearn
import tensorflow as tf
import cv2

from google.colab import drive
drive.mount('/content/drive')

print("Pandas:", pd.__version__)
print("NumPy:", np.__version__)
print("Scikit-learn:", sklearn.__version__)
print("TensorFlow:", tf.__version__)
print("OpenCV:", cv2.__version__)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Pandas: 2.2.2
NumPy: 2.0.2
Scikit-learn: 1.6.1
TensorFlow: 2.19.0
OpenCV: 4.13.0


In [3]:
def generate_soldier_data(num_soldiers=50, num_days=10, seed=42):
    np.random.seed(seed)

    data = []

    for soldier in range(1, num_soldiers + 1):

        # Personal baseline
        base_hr = np.random.uniform(65, 75)
        base_hrv = np.random.uniform(50, 70)
        base_sleep = np.random.uniform(6, 8)
        base_stress = np.random.uniform(2, 4)

        for day in range(1, num_days + 1):

            activity = np.random.randint(3, 8)

            heart_rate = base_hr + np.random.normal(0, 3)
            hrv = base_hrv + np.random.normal(0, 4)
            sleep = base_sleep + np.random.normal(0, 0.5)
            stress = base_stress + np.random.normal(0, 0.7)

            # Inject anomalies
            if soldier == 2 and day >= 7:
                heart_rate += 20
                sleep -= 1.5
                stress += 3

            if soldier == 4 and day >= 6:
                heart_rate += 25
                sleep -= 2
                hrv -= 15

            data.append([
                soldier,
                day,
                round(heart_rate, 1),
                round(hrv, 1),
                round(sleep, 2),
                round(stress, 2),
                activity
            ])

    columns = [
        "soldier_id",
        "day",
        "heart_rate",
        "hrv",
        "sleep_hours",
        "stress_score",
        "activity_level"
    ]

    df = pd.DataFrame(data, columns=columns)

    return df

In [4]:
df = generate_soldier_data()

print("Shape:", df.shape)
print(df.head())

Shape: (500, 7)
   soldier_id  day  heart_rate   hrv  sleep_hours  stress_score  \
0           1    1        69.6  73.1         7.17          2.83   
1           1    2        73.1  75.2         6.24          3.62   
2           1    3        69.4  65.9         7.54          3.17   
3           1    4        69.1  67.0         7.16          3.86   
4           1    5        66.5  65.9         7.94          4.30   

   activity_level  
0               4  
1               7  
2               6  
3               3  
4               7  


In [5]:
print("Soldier 2 (Day 7-10)")
print(df[(df.soldier_id == 2) & (df.day >= 7)])

print("\nSoldier 4 (Day 6-10)")
print(df[(df.soldier_id == 4) & (df.day >= 6)])

Soldier 2 (Day 7-10)
    soldier_id  day  heart_rate   hrv  sleep_hours  stress_score  \
16           2    7        86.6  70.9         6.04          6.68   
17           2    8        87.9  65.5         5.53          6.21   
18           2    9        82.9  65.0         5.72          5.43   
19           2   10        81.0  62.2         5.98          5.58   

    activity_level  
16               5  
17               6  
18               3  
19               6  

Soldier 4 (Day 6-10)
    soldier_id  day  heart_rate   hrv  sleep_hours  stress_score  \
35           4    6        98.0  39.7         3.22          3.20   
36           4    7        94.5  50.7         4.50          3.19   
37           4    8        99.7  41.4         3.29          3.62   
38           4    9        97.1  32.1         4.35          3.83   
39           4   10        92.8  33.8         3.96          4.39   

    activity_level  
35               7  
36               3  
37               5  
38               5

In [6]:
for soldier in df["soldier_id"].unique():
    print(f"\n--- Soldier {soldier} ---")
    print(df[df["soldier_id"] == soldier])


--- Soldier 1 ---
   soldier_id  day  heart_rate   hrv  sleep_hours  stress_score  \
0           1    1        69.6  73.1         7.17          2.83   
1           1    2        73.1  75.2         6.24          3.62   
2           1    3        69.4  65.9         7.54          3.17   
3           1    4        69.1  67.0         7.16          3.86   
4           1    5        66.5  65.9         7.94          4.30   
5           1    6        68.1  70.6         6.83          3.96   
6           1    7        67.4  69.3         7.38          4.04   
7           1    8        69.4  72.5         6.96          2.09   
8           1    9        75.8  73.3         7.51          3.49   
9           1   10        69.7  67.5         7.13          3.63   

   activity_level  
0               4  
1               7  
2               6  
3               3  
4               7  
5               5  
6               4  
7               4  
8               4  
9               7  

--- Soldier 2 ---
    

In [7]:
def compute_zscore_anomalies(df, threshold=2):
    df = df.copy()

    df["anomaly_flag"] = False
    df["anomaly_reason"] = ""

    for soldier in df["soldier_id"].unique():

        soldier_data = df[df["soldier_id"] == soldier]

        # Baseline = first 5 days
        baseline = soldier_data[soldier_data["day"] <= 5]

        means = baseline[["heart_rate", "hrv", "sleep_hours", "stress_score"]].mean()
        stds = baseline[["heart_rate", "hrv", "sleep_hours", "stress_score"]].std()

        # Avoid division by zero
        stds = stds.replace(0, 0.01)

        # Evaluate days 6-10
        for idx, row in soldier_data[soldier_data["day"] >= 6].iterrows():

            reasons = []

            for metric in ["heart_rate", "hrv", "sleep_hours", "stress_score"]:
                z = (row[metric] - means[metric]) / stds[metric]

                if abs(z) > threshold:
                    reasons.append(f"{metric} abnormal (Z={round(z,2)})")

            if reasons:
                df.loc[idx, "anomaly_flag"] = True
                df.loc[idx, "anomaly_reason"] = ", ".join(reasons)

    return df

In [8]:
df = compute_zscore_anomalies(df)

print("Total anomalies detected:", df["anomaly_flag"].sum())

df[(df["soldier_id"] == 2) & (df["day"] >= 6)]
df[(df["soldier_id"] == 4) & (df["day"] >= 6)]

Total anomalies detected: 114


Unnamed: 0,soldier_id,day,heart_rate,hrv,sleep_hours,stress_score,activity_level,anomaly_flag,anomaly_reason
35,4,6,98.0,39.7,3.22,3.2,7,True,"heart_rate abnormal (Z=11.9), hrv abnormal (Z=..."
36,4,7,94.5,50.7,4.5,3.19,3,True,"heart_rate abnormal (Z=10.42), sleep_hours abn..."
37,4,8,99.7,41.4,3.29,3.62,5,True,"heart_rate abnormal (Z=12.61), hrv abnormal (Z..."
38,4,9,97.1,32.1,4.35,3.83,5,True,"heart_rate abnormal (Z=11.52), hrv abnormal (Z..."
39,4,10,92.8,33.8,3.96,4.39,6,True,"heart_rate abnormal (Z=9.7), hrv abnormal (Z=-..."
