In [None]:
!pip install dash
!pip install pyngrok
!pip install scikit-optimize
!pip install pytz
import pytz
from flask import Flask, request, jsonify
import pandas as pd
import numpy as np
from scipy.stats import kurtosis, skew
import pytz
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from scipy.stats import wasserstein_distance
from skopt import BayesSearchCV
import plotly.graph_objs as go
from dash import Dash, dcc, html
from dash.dependencies import Input, Output, State
import os
import matplotlib.pyplot as plt
import io

Collecting dash
  Downloading dash-2.17.1-py3-none-any.whl (7.5 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/7.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/7.5 MB[0m [31m5.6 MB/s[0m eta [36m0:00:02[0m[2K     [91m━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/7.5 MB[0m [31m30.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m7.0/7.5 MB[0m [31m66.8 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m7.5/7.5 MB[0m [31m68.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from

In [None]:
# Define the column list
colo = ['a_res', 'ax', 'ay', 'az', 'g_res', 'gx', 'gy', 'gz', 'm_res', 'mx', 'my', 'mz']

# Helper functions for data preparation
def create_df(file_name, device_id):
    df = pd.read_excel(file_name, sheet_name=device_id)
    return df

def df_splitter(df):
    df_split = df['sensor_data'].str.split(',', expand=True)
    df_split.columns = [f"col{i}" for i in range(506)]
    df_split.drop(columns=[f"col{i}" for i in range(11, 38)], inplace=True)
    return df_split

def break_sensor_data(begin, df):
    data = df
    cols0 = ['ax', 'ay', 'az', 'gx', 'gy', 'gz', 'mx', 'my', 'mz']
    for x in cols0:
        data[x] = data.iloc[:, [*range(begin, data.shape[1], 9)]].values.astype(float).tolist()
        begin += 1
    return data

def calc_resultant(x, y, z):
    # Convert lists to numpy arrays for vectorized operations
    x = np.array(x)
    y = np.array(y)
    z = np.array(z)
    return (x**2 + y**2 + z**2)**0.5

def break_cols(data, numcol, cols):
    result = pd.DataFrame(index=data.index) # Use data.index for the index
    for c in cols:
        temp = pd.DataFrame(data[c].tolist())
        temp.columns = [c + '_' + str(r) for r in range(numcol)]
        result = pd.concat([result, temp], axis=1)
    return result

def deg(x, y):
    angle_deg = []
    for i in range(len(x)):
        angle_rad = np.arctan2(x[i], y[i])
        angle_deg_single = np.degrees(angle_rad)
        angle_deg.append(np.round(angle_deg_single, 2))
    return angle_deg

def transform(data):
    assert isinstance(data, pd.DataFrame), "Data passed to transform is not a DataFrame"  # Debug step
    mx_cols = [col for col in data.columns if 'mx' in col]
    my_cols = [col for col in data.columns if 'my' in col]
    mz_cols = [col for col in data.columns if 'mz' in col]

    for mx_col, my_col, mz_col in zip(mx_cols, my_cols, mz_cols):
        data[f'{mx_col}_1'] = deg(data[my_col], data[mz_col])
        data[f'{my_col}_1'] = deg(data[mz_col], data[mx_col])
        data[f'{mz_col}_1'] = deg(data[mx_col], data[my_col])

    return data

def get_X(dat):
    data_sensor = dat.copy()
    data_sensor = data_sensor.applymap(lambda x: np.array(x))
    data_sensor["a_res"] = calc_resultant(data_sensor['ax'], data_sensor['ay'], data_sensor['az'])
    data_sensor["g_res"] = calc_resultant(data_sensor['gx'], data_sensor['gy'], data_sensor['gz'])
    data_sensor["m_res"] = calc_resultant(data_sensor['mx'], data_sensor['my'], data_sensor['mz'])
    X = break_cols(data_sensor, 52, ['a_res', 'ax', 'ay', 'az', 'g_res', 'gx', 'gy', 'gz', 'm_res', 'mx', 'my', 'mz'])
    assert isinstance(X, pd.DataFrame), "X is not a DataFrame"  # Debug step
    return X

def create_res_df(prefix, df):
    cols = [col for col in df.columns if col.startswith(prefix)]
    return pd.DataFrame(df[cols].values.reshape(-1, 1), columns=[prefix])

def preprocess1(df):
    df_split = df_splitter(df)
    df_split = break_sensor_data(11, df_split)
    df_split['utc_time'] = pd.to_datetime(pd.to_numeric(df_split.col5), unit='s')
    local_timezone = pytz.timezone('Asia/Kolkata')

    def convert_utc_to_local(utc_time):
        utc_time = utc_time.replace(tzinfo=pytz.utc)
        local_time = utc_time.astimezone(local_timezone)
        return local_time

    df_split['local_time'] = df_split['utc_time'].apply(convert_utc_to_local)
    X = get_X(df_split)
    assert isinstance(X, pd.DataFrame), "X returned to preprocess1 is not a DataFrame"  # Debug step
    X1 = transform(X)
    X1['time'] = df_split['local_time']
    return X1, X.shape[0]

def preprocess2(X1):
    dfs = []
    for col in colo:
        dfs.append(create_res_df(col, X1))
    merged_df = pd.concat(dfs, axis=1)
    merged_df.dropna(inplace=True)
    return merged_df

def preprocess3(merged_df):
    new_df = merged_df.copy()
    new_df = first_diff(new_df)
    new_df = ffts(new_df)
    return new_df

def first_diff(new_df):
    new_df_diff = pd.DataFrame()
    for col in new_df.columns:
        diff_list = []
        for i in range(0, len(new_df), 52):
            segment_diff = new_df[col].iloc[i:i+52].diff().fillna(0)
            diff_list.extend(segment_diff.tolist())
        new_df_diff[col] = diff_list
    new_df_diff.columns = [col + '_diff' for col in new_df_diff.columns]
    new_df = pd.concat([new_df, new_df_diff], axis=1)
    return new_df

def compute_fft_log(segment):
    fft_result = np.fft.fft(segment)
    magnitudes = np.abs(fft_result)
    log_magnitudes = np.log(magnitudes + 1e-8)
    return log_magnitudes

def ffts(new_df):
    new_df_fft_log = pd.DataFrame()
    for col in new_df.columns:
        fft_log_list = []
        for i in range(0, len(new_df), 52):
            segment = new_df[col].iloc[i:i+52].fillna(0)
            log_magnitudes = compute_fft_log(segment)
            fft_log_list.extend(log_magnitudes)
        new_df_fft_log[col] = fft_log_list
    new_df_fft_log.columns = [col + '_fft' for col in new_df_fft_log.columns]
    new_df = pd.concat([new_df, new_df_fft_log], axis=1)
    return new_df

def process_with_time(new_df, X1):
    # Extract 'time' column from X1
    X1 = X1[['time']]

    # Categorize time of day if needed
    X1["time_of_day"] = X1["time"].apply(categorize_time_of_day)  # Assuming 'categorize_time_of_day' is defined elsewhere
    times = []
    for i in range(0, len(X1)):
      for j in range(3, 55):
        times.append(pd.to_datetime(X1['time'].iloc[i]) + pd.to_timedelta(j, unit='s'))


    # Assign times to new_df if lengths match
    if len(times) == new_df.shape[0]:
        new_df['time'] = times
    else:
        print("Error: Length of 'times' does not match the number of rows in 'new_df'")

    return new_df, X1

def filter_time_window(new_df, start_date, end_date, start_time, end_time):
    # Create start and end datetime objects, subtracting 5 hours 30 minutes for timezone adjustment
    start_datetime = pd.to_datetime(f"{start_date} {start_time}", utc=True).tz_convert('Asia/Kolkata')- pd.Timedelta(hours=5, minutes=30)
    end_datetime = pd.to_datetime(f"{end_date} {end_time}", utc=True).tz_convert('Asia/Kolkata')- pd.Timedelta(hours=5, minutes=30)

    # Filter the DataFrame based on the time window
    filtered_new_df = new_df[(new_df['time'] >= (start_datetime)+pd.Timedelta(minutes=10)) & (new_df['time'] <= (end_datetime))]
    return filtered_new_df

def process_metric_streams(new_df, func, col):
    new_df = new_df.drop('time', axis=1)
    metric_series = []
    if func == count_percentile_anomaly:
        col_data = new_df[col].fillna(0)
        top_5_percentile = np.percentile(col_data, 95)
        bottom_5_percentile = np.percentile(col_data, 5)
    for i in range(0, len(new_df), 52):
        segment = new_df[col].iloc[i:i+52].fillna(0)
        if func == count_percentile_anomaly:
            metric_value = func(segment, top_5_percentile, bottom_5_percentile)
        else:
            metric_value = func(segment)
        metric_series.append(metric_value)
    return metric_series

def make_X1(X1, metric_series):
    X1['metric_values'] = metric_series
    return X1

def make_X1_dfs(X1, n):
    # Reverse the DataFrame
    X1d = X1.iloc[::-1]

    # Check if n is in permissible values
    permissible_values = [1, 2, 3, 4, 6]
    if n not in permissible_values:
        raise ValueError(f"n must be one of {permissible_values}")

    # Initialize an empty DataFrame for results
    X2 = pd.DataFrame()

    # Aggregate rows in chunks of n * 6, taking the mean excluding 'time_of_day'
    for i in range(0, len(X1d), n * 6):
        segment = X1d.iloc[i:i + (n * 6)]
        segment['time'] = pd.to_datetime(segment['time'])
        # Take the initial 'time' value
        initial_time = segment['time'].iloc[0]
        # Exclude 'time_of_day' from mean calculation
        segment_avg = segment.drop(columns=['time_of_day']).mean(axis=0)
        segment_avg['time'] = initial_time
        X2 = pd.concat([X2, pd.DataFrame([segment_avg])], ignore_index=True)

    # Round 'time' values and categorize 'time_of_day'
    X2['time'] = pd.to_datetime(X2['time']).dt.round("2s")
    X2["time_of_day"] = X2["time"].apply(categorize_time_of_day)

    # Calculate the number of segments per day and total days
    num = 24 / n
    num_rows = X2.shape[0]
    num_days = num_rows / num

    # Initialize list of DataFrames for each periodic segment
    dfs = [pd.DataFrame() for _ in range(int(num))]

    # Populate each DataFrame with periodic elements
    for i in range(int(num_days)):
        for j in range(int(num)):
            dfs[j] = pd.concat([dfs[j], X2.iloc[[i * int(num) + j]]], ignore_index=True)

    return dfs

def select_df(dfs, slot):
    return dfs[slot]

def anomaly_basic_lvl(dl, n):
    streams = dl['metric_values']
    last_element = streams[-1] if streams else None
    ratios = []
    anomalies = []
    lower_bound = 1 - (n / 100)
    upper_bound = 1 + (n / 100)
    for element in streams:
        if element != 0:
            ratio = last_element / element
            is_anomaly = not (lower_bound <= ratio <= upper_bound)
            ratios.append(ratio)
            anomalies.append(is_anomaly)
        else:
            ratios.append(None)
            anomalies.append(True)
    dl['ratios'] = ratios
    dl['anomalies'] = anomalies
    return dl

def detect_anomalies(dl, percentile_thresholds=(5, 95)):
    streams = dl['metric_values']
    anomalies_p = []
    for stream in streams:
        if not isinstance(stream, (list, np.ndarray)):
            print(f"Warning: Skipping non-iterable stream: {stream}")
            continue
        top_5 = np.percentile(stream, percentile_thresholds[1])
        bottom_5 = np.percentile(stream, percentile_thresholds[0])
        for value in stream:
            if value > top_5 or value < bottom_5:
                anomalies_p.append((value, "percentile-based-Anomaly"))
            else:
                anomalies_p.append((value, "regular"))
    dl['percentile_based_anomaly'] = anomalies_p
    return dl

def optimize_contamination(dl, estimator=IsolationForest(), optimizer='grid', scoring='f1', **kwargs):
    data_stream = dl['metric_values']
    data_stream = np.array(data_stream).reshape(-1, 1)
    train_data, test_data = train_test_split(data_stream, test_size=0.2, random_state=42)
    contamination_range = np.linspace(0.01, 0.2, 20)
    wasserstein_distances = []
    models = []
    if optimizer == 'grid':
        for contamination in contamination_range:
            params = estimator.get_params()
            params['contamination'] = contamination
            model = IsolationForest(**params)
            model.fit(train_data)
            train_scores = model.decision_function(train_data)
            test_scores = model.decision_function(test_data)
            wasserstein_distances.append(wasserstein_distance(train_scores, test_scores))
            models.append(model)
        best_model_idx = np.argmin(wasserstein_distances)
        best_model = models[best_model_idx]
    elif optimizer == 'random':
        param_distributions = {'contamination': contamination_range}
        search = RandomizedSearchCV(estimator, param_distributions, n_iter=10, scoring=scoring, random_state=42)
        search.fit(train_data, test_data)
        best_model = search.best_estimator_
    elif optimizer == 'bayesian':
        param_space = {'contamination': (0.01, 0.2)}
        search = BayesSearchCV(estimator, param_space, n_iter=10, scoring=scoring, random_state=42)
        search.fit(train_data, test_data)
        best_model = search.best_estimator_
    else:
        raise ValueError("Optimizer must be 'grid', 'random', or 'bayesian'.")
    return best_model

def count_percentile_anomaly(segment, top_5_percentile, bottom_5_percentile):
    count_above = np.sum(segment > top_5_percentile)
    count_below = np.sum(segment < bottom_5_percentile)
    return (count_above+count_below)

def compute_spectral_centroid(segment):
    length = len(segment)
    freqs = np.fft.fftfreq(length)
    centroid = np.sum(segment * np.abs(freqs)) / np.sum(segment)
    return centroid

def compute_mean(segment):
    return segment.mean()

def compute_max(segment):
    return segment.max()

def compute_min(segment):
    return segment.min()

def compute_median(segment):
    return segment.median()

def compute_energy(segment):
    return np.sum(segment**2)

def compute_kurtosis(segment):
    return kurtosis(segment)

def compute_skewness(segment):
    return skew(segment)

def compute_mean_abs_dev(segment):
    mean = segment.mean()
    return np.mean(np.abs(segment - mean))

def compute_positive_counts(segment):
    return np.sum(segment > 0)

def compute_negative_counts(segment):
    return np.sum(segment < 0)

def compute_iqr(segment):
    return np.percentile(segment, 75) - np.percentile(segment, 25)

def compute_std_dev(segment):
    return segment.std()

def compute_count_above_mean(segment):
    mean = segment.mean()
    return np.sum(segment > mean)

def compute_range(segment):
    return segment.max() - segment.min()

def compute_peak_count(segment):
    peaks = (np.diff(np.sign(np.diff(segment))) < 0).sum()
    return peaks

def compute_median_abs_dev(segment):
    median = segment.median()
    return np.median(np.abs(segment - median))

def compute_zcr(segment):
    zcr = ((segment[:-1] * segment[1:]) < 0).sum()
    return zcr

def compute_top_5_percentile(segment):
    return np.percentile(segment, 95)

def compute_bottom_5_percentile(segment):
    return np.percentile(segment, 5)

def categorize_time_of_day(time):
    hour = time.hour
    if 6 <= hour < 12:
        return "Morning"
    elif 12 <= hour < 18:
        return "Afternoon"
    elif 18 <= hour < 24:
        return "Evening"
    elif 0 <= hour < 6:
        return "Night"
    else:
        raise ValueError("Invalid hour value.")

aggregation_functions = {
    "Mean": compute_mean,
    "Max": compute_max,
    "Min": compute_min,
    "Median": compute_median,
    "Energy": compute_energy,
    "Kurtosis": compute_kurtosis,
    "Skewness": compute_skewness,
    "Mean Absolute Deviation": compute_mean_abs_dev,
    "Positive Counts": compute_positive_counts,
    "Negative Counts": compute_negative_counts,
    "Interquartile Range": compute_iqr,
    "Standard Deviation": compute_std_dev,
    "Count Above Mean": compute_count_above_mean,
    "Range": compute_range,
    "Peak Count": compute_peak_count,
    "Median Absolute Deviation": compute_median_abs_dev,
    "Zero Crossing Rate": compute_zcr,
    "Count Percentile Anomaly": count_percentile_anomaly
}

In [None]:
df=create_df('Sensordata.xlsx', '1700')

In [None]:
df

Unnamed: 0,sr,gatewayid,deviceid,evt,sensor_data,dbcreatetime
0,8458175,GA00000069,S1I1A1700,SAMPLES,"00000,S1I1A1700,00:00:00:00:00:00,0,11444,1693...",2023-09-01 00:02:44.899 +0530
1,8459217,GA00000069,S1I1A1700,SAMPLES,"00001,S1I1A1700,00:00:00:00:00:00,0,11445,1693...",2023-09-01 00:15:31.361 +0530
2,8459654,GA00000069,S1I1A1700,SAMPLES,"00001,S1I1A1700,00:00:00:00:00:00,0,11446,1693...",2023-09-01 00:23:41.524 +0530
3,8460259,GA00000069,S1I1A1700,SAMPLES,"00001,S1I1A1700,00:00:00:00:00:00,0,11447,1693...",2023-09-01 00:32:43.730 +0530
4,8461005,GA00000069,S1I1A1700,SAMPLES,"00000,S1I1A1700,00:00:00:00:00:00,0,11448,1693...",2023-09-01 00:42:39.166 +0530
...,...,...,...,...,...,...
8384,13826069,GA00000069,S1I1A1700,SAMPLES,"00000,S1I1A1700,00:00:00:00:00:00,0,20223,1698...",2023-10-31 23:12:37.521 +0530
8385,13826628,GA00000069,S1I1A1700,SAMPLES,"00001,S1I1A1700,00:00:00:00:00:00,0,20224,1698...",2023-10-31 23:22:39.718 +0530
8386,13827176,GA00000069,S1I1A1700,SAMPLES,"00000,S1I1A1700,00:00:00:00:00:00,0,20225,1698...",2023-10-31 23:32:44.330 +0530
8387,13827673,GA00000069,S1I1A1700,SAMPLES,"00000,S1I1A1700,00:00:00:00:00:00,0,20226,1698...",2023-10-31 23:42:37.250 +0530


In [None]:
X1,num_rows=preprocess1(df)

  data[f'{my_col}_1'] = deg(data[mz_col], data[mx_col])
  data[f'{mz_col}_1'] = deg(data[mx_col], data[my_col])
  data[f'{mx_col}_1'] = deg(data[my_col], data[mz_col])
  data[f'{my_col}_1'] = deg(data[mz_col], data[mx_col])
  data[f'{mz_col}_1'] = deg(data[mx_col], data[my_col])
  data[f'{mx_col}_1'] = deg(data[my_col], data[mz_col])
  data[f'{my_col}_1'] = deg(data[mz_col], data[mx_col])
  data[f'{mz_col}_1'] = deg(data[mx_col], data[my_col])
  data[f'{mx_col}_1'] = deg(data[my_col], data[mz_col])
  data[f'{my_col}_1'] = deg(data[mz_col], data[mx_col])
  data[f'{mz_col}_1'] = deg(data[mx_col], data[my_col])
  data[f'{mx_col}_1'] = deg(data[my_col], data[mz_col])
  data[f'{my_col}_1'] = deg(data[mz_col], data[mx_col])
  data[f'{mz_col}_1'] = deg(data[mx_col], data[my_col])
  data[f'{mx_col}_1'] = deg(data[my_col], data[mz_col])
  data[f'{my_col}_1'] = deg(data[mz_col], data[mx_col])
  data[f'{mz_col}_1'] = deg(data[mx_col], data[my_col])
  data[f'{mx_col}_1'] = deg(data[my_col], data[m

In [None]:
X1

Unnamed: 0,a_res_0,a_res_1,a_res_2,a_res_3,a_res_4,a_res_5,a_res_6,a_res_7,a_res_8,a_res_9,...,mx_49_1,my_49_1,mz_49_1,mx_50_1,my_50_1,mz_50_1,mx_51_1,my_51_1,mz_51_1,time
0,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,...,-78.77,115.68,-174.55,-78.90,119.98,-173.54,-79.30,122.91,-173.03,2023-09-01 00:00:00+05:30
1,71.533209,71.533209,71.533209,71.533209,71.533209,71.533209,71.533209,71.358251,71.533209,71.358251,...,-65.82,48.11,158.06,-65.84,48.48,158.34,-64.08,52.35,159.44,2023-09-01 00:10:00+05:30
2,72.180330,72.642962,72.463784,72.180330,72.180330,72.180330,72.180330,72.180330,72.642962,72.642962,...,-67.56,101.48,-175.21,-77.20,113.75,-174.29,-71.15,105.47,-174.60,2023-09-01 00:20:00+05:30
3,71.477269,71.477269,71.477269,71.477269,71.477269,71.477269,71.477269,71.477269,71.288148,71.288148,...,-68.84,43.03,157.47,-69.47,44.31,159.02,-70.64,43.57,159.73,2023-09-01 00:30:00+05:30
4,71.930522,71.637979,71.840100,71.637979,71.637979,71.728655,70.753092,71.728655,71.728655,71.930522,...,-66.99,47.91,159.02,-66.43,49.84,159.79,-66.99,47.57,158.79,2023-09-01 00:40:00+05:30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8384,71.568149,72.013888,72.013888,71.091490,72.698005,71.686819,71.686819,71.624018,71.895758,71.895758,...,-133.81,-61.05,152.05,-134.40,-60.74,151.25,-135.00,-60.34,150.34,2023-10-31 23:10:00+05:30
8385,71.637979,71.372264,71.196910,71.449283,71.274119,71.449283,71.449283,71.449283,70.971825,71.630999,...,-120.80,-40.73,145.30,-123.17,-42.53,144.53,-122.91,-41.73,144.04,2023-10-31 23:20:00+05:30
8386,71.239034,71.651936,71.175839,71.175839,70.724819,71.147734,71.617037,70.738957,71.651936,71.182863,...,-129.61,-124.02,-150.81,-130.04,-123.80,-150.64,-127.29,-123.46,-153.28,2023-10-31 23:30:00+05:30
8387,71.470274,71.540198,71.540198,71.644958,71.644958,71.644958,71.540198,71.644958,71.644958,71.644958,...,-131.26,-31.13,124.54,-130.51,-29.43,123.44,-132.99,-32.45,124.30,2023-10-31 23:40:00+05:30


In [None]:
# prompt: for X1 ensure that time in consecutive rows differ by 10 minutes, not add a row duplicating the other column values of the previous row. and create X1_new

import pandas as pd

def adjust_time_difference(X1):
  """
  Adjusts the time in X1 so that consecutive rows differ by 10 minutes.

  Args:
    X1: The original DataFrame.

  Returns:
    A new DataFrame with adjusted time values.
  """

  X1_new = X1.copy()
  for i in range(1, len(X1_new)):
    previous_time = X1_new.loc[i - 1, 'time']
    current_time = X1_new.loc[i, 'time']
    time_difference = current_time - previous_time
    if time_difference != pd.Timedelta(minutes=10):
      X1_new.loc[i, 'time'] = previous_time + pd.Timedelta(minutes=10)

  return X1_new

X1_new = adjust_time_difference(X1)
X1_new


Unnamed: 0,a_res_0,a_res_1,a_res_2,a_res_3,a_res_4,a_res_5,a_res_6,a_res_7,a_res_8,a_res_9,...,mx_49_1,my_49_1,mz_49_1,mx_50_1,my_50_1,mz_50_1,mx_51_1,my_51_1,mz_51_1,time
0,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,71.874891,...,-78.77,115.68,-174.55,-78.90,119.98,-173.54,-79.30,122.91,-173.03,2023-09-01 00:00:00+05:30
1,71.533209,71.533209,71.533209,71.533209,71.533209,71.533209,71.533209,71.358251,71.533209,71.358251,...,-65.82,48.11,158.06,-65.84,48.48,158.34,-64.08,52.35,159.44,2023-09-01 00:10:00+05:30
2,72.180330,72.642962,72.463784,72.180330,72.180330,72.180330,72.180330,72.180330,72.642962,72.642962,...,-67.56,101.48,-175.21,-77.20,113.75,-174.29,-71.15,105.47,-174.60,2023-09-01 00:20:00+05:30
3,71.477269,71.477269,71.477269,71.477269,71.477269,71.477269,71.477269,71.477269,71.288148,71.288148,...,-68.84,43.03,157.47,-69.47,44.31,159.02,-70.64,43.57,159.73,2023-09-01 00:30:00+05:30
4,71.930522,71.637979,71.840100,71.637979,71.637979,71.728655,70.753092,71.728655,71.728655,71.930522,...,-66.99,47.91,159.02,-66.43,49.84,159.79,-66.99,47.57,158.79,2023-09-01 00:40:00+05:30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8384,71.568149,72.013888,72.013888,71.091490,72.698005,71.686819,71.686819,71.624018,71.895758,71.895758,...,-133.81,-61.05,152.05,-134.40,-60.74,151.25,-135.00,-60.34,150.34,2023-10-29 05:20:00+05:30
8385,71.637979,71.372264,71.196910,71.449283,71.274119,71.449283,71.449283,71.449283,70.971825,71.630999,...,-120.80,-40.73,145.30,-123.17,-42.53,144.53,-122.91,-41.73,144.04,2023-10-29 05:30:00+05:30
8386,71.239034,71.651936,71.175839,71.175839,70.724819,71.147734,71.617037,70.738957,71.651936,71.182863,...,-129.61,-124.02,-150.81,-130.04,-123.80,-150.64,-127.29,-123.46,-153.28,2023-10-29 05:40:00+05:30
8387,71.470274,71.540198,71.540198,71.644958,71.644958,71.644958,71.540198,71.644958,71.644958,71.644958,...,-131.26,-31.13,124.54,-130.51,-29.43,123.44,-132.99,-32.45,124.30,2023-10-29 05:50:00+05:30


In [None]:
merged_df= preprocess2(X1_new)

In [None]:
merged_df

Unnamed: 0,a_res,ax,ay,az,g_res,gx,gy,gz,m_res,mx,my,mz
0,71.874891,-29.0,-47.0,46.0,1054.656342,770.0,630.0,-350.0,272.899249,-28.0,-267.0,49.0
1,71.874891,-29.0,-47.0,46.0,1158.706175,490.0,1050.0,0.0,277.023465,-30.0,-271.0,49.0
2,71.874891,-29.0,-47.0,46.0,865.852181,560.0,560.0,-350.0,273.777282,-27.0,-268.0,49.0
3,71.874891,-29.0,-47.0,46.0,785.748051,350.0,700.0,-70.0,275.457801,-24.0,-270.0,49.0
4,71.874891,-29.0,-47.0,46.0,871.492972,350.0,770.0,-210.0,276.799566,-24.0,-271.0,51.0
...,...,...,...,...,...,...,...,...,...,...,...,...
436223,71.112587,26.0,-66.0,5.0,1254.153101,1120.0,-560.0,-70.0,278.951609,-64.0,-180.0,-141.0
436224,71.112587,26.0,-66.0,5.0,1167.133240,770.0,420.0,-770.0,281.172545,-63.0,-178.0,-148.0
436225,71.112587,26.0,-66.0,5.0,618.223261,140.0,490.0,-350.0,282.575654,-63.0,-175.0,-148.0
436226,71.112587,26.0,-66.0,5.0,944.351629,770.0,420.0,-350.0,282.187881,-42.0,-168.0,-159.0


In [None]:
new_df= preprocess3(merged_df)

In [None]:
new_df

Unnamed: 0,a_res,ax,ay,az,g_res,gx,gy,gz,m_res,mx,...,ay_diff_fft,az_diff_fft,g_res_diff_fft,gx_diff_fft,gy_diff_fft,gz_diff_fft,m_res_diff_fft,mx_diff_fft,my_diff_fft,mz_diff_fft
0,71.874891,-29.0,-47.0,46.0,1054.656342,770.0,630.0,-350.0,272.899249,-28.0,...,-18.420681,-18.420681,5.409198,4.248495,5.857933,-18.420681,1.348004,1.609438,1.098612,0.693147
1,71.874891,-29.0,-47.0,46.0,1158.706175,490.0,1050.0,0.0,277.023465,-30.0,...,-1.049722,-18.420681,6.339172,5.538302,6.385039,5.400403,1.720535,1.126299,1.468446,1.884503
2,71.874891,-29.0,-47.0,46.0,865.852181,560.0,560.0,-350.0,273.777282,-27.0,...,-0.465133,-18.420681,7.057635,4.735616,6.807573,6.613071,1.731974,2.142199,1.799705,1.806188
3,71.874891,-29.0,-47.0,46.0,785.748051,350.0,700.0,-70.0,275.457801,-24.0,...,-0.253690,-18.420681,6.962566,6.062288,7.385776,6.920658,1.817315,1.108702,1.874948,1.193113
4,71.874891,-29.0,-47.0,46.0,871.492972,350.0,770.0,-210.0,276.799566,-24.0,...,-0.265908,-18.420681,8.002691,7.335445,7.479135,6.725517,2.843410,2.409666,2.927068,2.449256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
436223,71.112587,26.0,-66.0,5.0,1254.153101,1120.0,-560.0,-70.0,278.951609,-64.0,...,0.961857,0.724272,6.349657,7.665145,7.842435,7.115408,2.682473,5.791799,4.838131,4.937509
436224,71.112587,26.0,-66.0,5.0,1167.133240,770.0,420.0,-770.0,281.172545,-63.0,...,-0.044728,0.473311,7.980802,7.559561,6.163395,7.294506,2.777293,5.546644,4.686244,4.881458
436225,71.112587,26.0,-66.0,5.0,618.223261,140.0,490.0,-350.0,282.575654,-63.0,...,0.084620,0.900881,7.322701,7.397339,6.987828,6.501591,2.666697,5.362011,4.767558,4.242803
436226,71.112587,26.0,-66.0,5.0,944.351629,770.0,420.0,-350.0,282.187881,-42.0,...,0.434251,0.798873,7.095917,6.914342,6.797360,6.150460,2.298711,4.253013,3.245296,4.671260


In [None]:
new_df, X1_new = process_with_time(new_df,X1_new)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X1["time_of_day"] = X1["time"].apply(categorize_time_of_day)  # Assuming 'categorize_time_of_day' is defined elsewhere


In [None]:
new_df

In [None]:
X1_new

In [None]:
filtered_new_df= filter_time_window(new_df,'2023-09-02','2023-09-23','00:00:00','00:00:00')

In [None]:
filtered_new_df

Unnamed: 0,a_res,ax,ay,az,g_res,gx,gy,gz,m_res,mx,...,az_diff_fft,g_res_diff_fft,gx_diff_fft,gy_diff_fft,gz_diff_fft,m_res_diff_fft,mx_diff_fft,my_diff_fft,mz_diff_fft,time
7540,71.182863,23.0,-67.0,7.0,1429.440450,980.0,700.0,-770.0,188.767052,-122.56,...,1.000000e-08,6.184332,5.634790,4.248495,6.551080,2.413942,0.371564,1.650580,1.247032,2023-09-02 00:10:03+05:30
7541,71.288148,23.0,-67.0,8.0,795.047168,350.0,700.0,-140.0,192.317446,-121.22,...,-2.948708e-02,6.721922,6.563047,4.828327,6.318249,2.350396,0.944314,1.193975,0.835890,2023-09-02 00:10:04+05:30
7542,71.288148,23.0,-67.0,8.0,785.748051,420.0,210.0,-630.0,195.920902,-123.44,...,-1.215291e-01,6.792271,7.320835,5.907791,6.224076,2.947281,0.372216,1.369867,0.993642,2023-09-02 00:10:05+05:30
7543,71.182863,23.0,-67.0,7.0,1238.426421,840.0,840.0,-350.0,194.553335,-120.19,...,-2.877777e-01,7.216260,7.676173,6.219544,6.844415,2.597869,0.493013,2.002293,1.575176,2023-09-02 00:10:06+05:30
7544,71.288148,23.0,-67.0,8.0,842.911621,560.0,630.0,0.0,192.356440,-122.04,...,-5.482595e-01,7.488584,7.242107,7.010970,7.365240,2.676705,0.993424,1.241531,0.762418,2023-09-02 00:10:07+05:30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164731,67.572184,-49.0,-46.0,-7.0,32186.416327,3570.0,-31936.0,-1820.0,282.200283,-162.44,...,3.533471e+00,11.472973,11.104599,11.198259,10.138062,5.136089,6.702159,4.720516,4.472987,2023-09-22 23:50:50+05:30
164732,66.880490,-48.0,-45.0,-12.0,32966.791048,-29976.0,-11356.0,7700.0,320.184322,-164.71,...,1.714076e+00,10.702555,10.640642,11.780235,9.978132,5.137882,6.533236,4.024888,4.044809,2023-09-22 23:50:51+05:30
164733,66.820655,-48.0,-44.0,-15.0,30040.441075,22540.0,19740.0,2170.0,364.028845,-156.60,...,3.016916e+00,10.222509,8.569489,10.556958,9.994569,4.467341,6.196262,4.340611,3.755206,2023-09-22 23:50:52+05:30
164734,66.558245,-46.0,-45.0,-17.0,40125.878184,-30380.0,-25410.0,-6440.0,355.444792,-164.14,...,3.242860e+00,9.039390,10.099478,10.865708,10.005133,5.190794,5.298582,4.247640,2.799899,2023-09-22 23:50:53+05:30


In [None]:
xf1=filter_time_window(X1_new,'2023-09-02','2023-09-23','00:00:00','00:00:00')

In [None]:
xf1

Unnamed: 0,time,time_of_day
145,2023-09-02 00:10:00+05:30,Night
146,2023-09-02 00:20:00+05:30,Night
147,2023-09-02 00:30:00+05:30,Night
148,2023-09-02 00:40:00+05:30,Night
149,2023-09-02 00:50:00+05:30,Night
...,...,...
3164,2023-09-22 23:20:00+05:30,Evening
3165,2023-09-22 23:30:00+05:30,Evening
3166,2023-09-22 23:40:00+05:30,Evening
3167,2023-09-22 23:50:00+05:30,Evening


In [None]:
# prompt: Using dataframe xf1: drop last element of xf1

xf1 = xf1[:-1] # Drops the last element of the DataFrame


In [None]:
metric_series= process_metric_streams(filtered_new_df, compute_mean, 'g_res')

In [None]:
metric_series

[965.7675839489808,
 2663.043921911677,
 1078.9432017022389,
 3358.1413216154115,
 3003.9241085262943,
 3676.085910583207,
 3674.2879532605166,
 1226.9953489895322,
 1033.0572618578078,
 1119.853200407442,
 4223.9016457022235,
 2921.8535052316533,
 1200.3600493974368,
 4388.538007742662,
 2948.7881021231588,
 4454.561134217098,
 1034.3478116963108,
 1051.3411549082894,
 1027.32660455303,
 3209.550168193934,
 871.4392711917696,
 1341.1678985579472,
 4329.775665789611,
 3444.355000913919,
 4398.665999573483,
 1083.1634912886764,
 19187.769918232872,
 17182.937528547583,
 16528.307297696407,
 18928.76145070533,
 21013.668682115018,
 18686.35915173928,
 1533.0573543395708,
 3924.2172394677027,
 3382.124192383562,
 933.3030993428739,
 2392.9432255470074,
 1326.9817948233422,
 3118.978880125708,
 2385.761697802065,
 14766.211910935972,
 16264.112588502134,
 17394.140340001588,
 23047.017129454667,
 12349.803213406933,
 16071.6240418177,
 10911.049340080024,
 7715.140637085042,
 3593.00092554

In [None]:
len(metric_series)

3023

In [None]:
xf1=make_X1(xf1, metric_series)
xf1



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,time,time_of_day,metric_values
145,2023-09-02 00:10:00+05:30,Night,965.767584
146,2023-09-02 00:20:00+05:30,Night,2663.043922
147,2023-09-02 00:30:00+05:30,Night,1078.943202
148,2023-09-02 00:40:00+05:30,Night,3358.141322
149,2023-09-02 00:50:00+05:30,Night,3003.924109
...,...,...,...
3163,2023-09-22 23:10:00+05:30,Evening,16583.338198
3164,2023-09-22 23:20:00+05:30,Evening,15377.166515
3165,2023-09-22 23:30:00+05:30,Evening,14237.869302
3166,2023-09-22 23:40:00+05:30,Evening,14651.275886


In [None]:
# prompt: Using dataframe xf1: make 4 dataframes out of X1 such that 1 consisting of hr0-6, hr6-12, hr12-18, hr 18-0 for all days

# Create time intervals
intervals = [
    (0, 6),
    (6, 12),
    (12, 18),
    (18, 24)
]

# Create a list to store the dataframes
dfs = []

# Iterate over the intervals and create dataframes
for start, end in intervals:
    # Filter the dataframe based on the time interval
    df = xf1[(xf1['time'].dt.hour >= start) & (xf1['time'].dt.hour < end)]
    dfs.append(df)

# Print the first 5 rows of each dataframe
for df in dfs:
    print(df.head())


                         time time_of_day  metric_values
145 2023-09-02 00:10:00+05:30       Night     965.767584
146 2023-09-02 00:20:00+05:30       Night    2663.043922
147 2023-09-02 00:30:00+05:30       Night    1078.943202
148 2023-09-02 00:40:00+05:30       Night    3358.141322
149 2023-09-02 00:50:00+05:30       Night    3003.924109
                         time time_of_day  metric_values
180 2023-09-02 06:00:00+05:30     Morning     933.303099
181 2023-09-02 06:10:00+05:30     Morning    2392.943226
182 2023-09-02 06:20:00+05:30     Morning    1326.981795
183 2023-09-02 06:30:00+05:30     Morning    3118.978880
184 2023-09-02 06:40:00+05:30     Morning    2385.761698
                         time time_of_day  metric_values
216 2023-09-02 12:00:00+05:30   Afternoon    1225.947693
217 2023-09-02 12:10:00+05:30   Afternoon    2369.807261
218 2023-09-02 12:20:00+05:30   Afternoon    1174.049530
219 2023-09-02 12:30:00+05:30   Afternoon    2797.889368
220 2023-09-02 12:40:00+05:30  

In [None]:
dfs[0]

Unnamed: 0,time,time_of_day,metric_values
145,2023-09-02 00:10:00+05:30,Night,965.767584
146,2023-09-02 00:20:00+05:30,Night,2663.043922
147,2023-09-02 00:30:00+05:30,Night,1078.943202
148,2023-09-02 00:40:00+05:30,Night,3358.141322
149,2023-09-02 00:50:00+05:30,Night,3003.924109
...,...,...,...
3055,2023-09-22 05:10:00+05:30,Night,8498.212035
3056,2023-09-22 05:20:00+05:30,Night,16768.932334
3057,2023-09-22 05:30:00+05:30,Night,5267.293748
3058,2023-09-22 05:40:00+05:30,Night,11816.328106


In [None]:
import pandas as pd
df_new_0 = pd.DataFrame(dfs[0])
df_new_0

Unnamed: 0,time,time_of_day,metric_values
145,2023-09-02 00:10:00+05:30,Night,965.767584
146,2023-09-02 00:20:00+05:30,Night,2663.043922
147,2023-09-02 00:30:00+05:30,Night,1078.943202
148,2023-09-02 00:40:00+05:30,Night,3358.141322
149,2023-09-02 00:50:00+05:30,Night,3003.924109
...,...,...,...
3055,2023-09-22 05:10:00+05:30,Night,8498.212035
3056,2023-09-22 05:20:00+05:30,Night,16768.932334
3057,2023-09-22 05:30:00+05:30,Night,5267.293748
3058,2023-09-22 05:40:00+05:30,Night,11816.328106


In [None]:
best_model = optimize_contamination(df_new_0, optimizer='grid')

In [None]:
df_new_0['iso_anomaly'] = best_model.predict(df_new_0[['metric_values']]) == -1


X has feature names, but IsolationForest was fitted without feature names



In [None]:
df_new_0

Unnamed: 0,time,time_of_day,metric_values,iso_anomaly
145,2023-09-02 00:10:00+05:30,Night,965.767584,False
146,2023-09-02 00:20:00+05:30,Night,2663.043922,False
147,2023-09-02 00:30:00+05:30,Night,1078.943202,False
148,2023-09-02 00:40:00+05:30,Night,3358.141322,False
149,2023-09-02 00:50:00+05:30,Night,3003.924109,False
...,...,...,...,...
3055,2023-09-22 05:10:00+05:30,Night,8498.212035,False
3056,2023-09-22 05:20:00+05:30,Night,16768.932334,False
3057,2023-09-22 05:30:00+05:30,Night,5267.293748,False
3058,2023-09-22 05:40:00+05:30,Night,11816.328106,False


In [None]:
len(df_new_0[df_new_0['iso_anomaly'] == True])

10

In [None]:
import plotly.graph_objects as go

# Create an interactive scatter plot
fig = go.Figure()

# Add scatter trace for normal data points
fig.add_trace(go.Scatter(
    x=df_new_0[df_new_0['iso_anomaly'] == False]['time'],
    y=df_new_0[df_new_0['iso_anomaly'] == False]['metric_values'],
    mode='markers',
    name='Normal'
))

# Add scatter trace for anomaly data points
fig.add_trace(go.Scatter(
    x=df_new_0[df_new_0['iso_anomaly'] == True]['time'],
    y=df_new_0[df_new_0['iso_anomaly'] == True]['metric_values'],
    mode='markers',
    name='Anomaly',
    marker=dict(color='red')
))

# Set plot title and labels
fig.update_layout(
    title='Anomaly Detection',
    xaxis_title='Time',
    yaxis_title='Metric Values'
)

# Display the plot
fig.show()

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df_new_0['iso_anomaly_encoded'] = le.fit_transform(df_new_0['iso_anomaly'])
df_new_0

Unnamed: 0,time,time_of_day,metric_values,iso_anomaly,iso_anomaly_encoded
145,2023-09-02 00:10:00+05:30,Night,965.767584,False,0
146,2023-09-02 00:20:00+05:30,Night,2663.043922,False,0
147,2023-09-02 00:30:00+05:30,Night,1078.943202,False,0
148,2023-09-02 00:40:00+05:30,Night,3358.141322,False,0
149,2023-09-02 00:50:00+05:30,Night,3003.924109,False,0
...,...,...,...,...,...
3055,2023-09-22 05:10:00+05:30,Night,8498.212035,False,0
3056,2023-09-22 05:20:00+05:30,Night,16768.932334,False,0
3057,2023-09-22 05:30:00+05:30,Night,5267.293748,False,0
3058,2023-09-22 05:40:00+05:30,Night,11816.328106,False,0


In [None]:
df_new_00 = df_new_0.drop(['time_of_day', 'iso_anomaly'], axis=1) # Drop specified columns
df_new_00.head()

Unnamed: 0,time,metric_values,iso_anomaly_encoded
145,2023-09-02 00:10:00+05:30,965.767584,0
146,2023-09-02 00:20:00+05:30,2663.043922,0
147,2023-09-02 00:30:00+05:30,1078.943202,0
148,2023-09-02 00:40:00+05:30,3358.141322,0
149,2023-09-02 00:50:00+05:30,3003.924109,0


In [None]:
df_new_00['rolling_mean'] = df_new_00['metric_values'].rolling(window=36).mean() # Calculate rolling mean with window of 36
df_new_00['rolling_std'] = df_new_00['metric_values'].rolling(window=36).std()  # Calculate rolling standard deviation with window of 36

In [None]:
df_new_00

Unnamed: 0,time,metric_values,iso_anomaly_encoded,rolling_mean,rolling_std
145,2023-09-02 00:10:00+05:30,965.767584,0,,
146,2023-09-02 00:20:00+05:30,2663.043922,0,,
147,2023-09-02 00:30:00+05:30,1078.943202,0,,
148,2023-09-02 00:40:00+05:30,3358.141322,0,,
149,2023-09-02 00:50:00+05:30,3003.924109,0,,
...,...,...,...,...,...
3055,2023-09-22 05:10:00+05:30,8498.212035,0,8642.665734,5747.932760
3056,2023-09-22 05:20:00+05:30,16768.932334,0,8731.262945,5849.878009
3057,2023-09-22 05:30:00+05:30,5267.293748,0,8768.625104,5822.517439
3058,2023-09-22 05:40:00+05:30,11816.328106,0,8790.846910,5832.858419


In [None]:
# Create six empty DataFrames
dfs = [pd.DataFrame() for _ in range(6)]

# Group the data into chunks of 6 and assign to the respective DataFrame
for i in range(0, len(df), 6):
    chunk = df.iloc[i:i+6]
    dfs[(i // 6) % 6] = pd.concat([dfs[(i // 6) % 6], chunk], ignore_index=True)

# Output the results
for idx, group in enumerate(dfs, start=1):
    print(f"DataFrame {idx}:")
    print(group)
    print("\n")

DataFrame 1:
                         time time_of_day  metric_values
0   2023-09-02 18:00:00+05:30     Evening   14687.024904
1   2023-09-02 18:10:00+05:30     Evening   12571.989266
2   2023-09-02 18:20:00+05:30     Evening   10866.333867
3   2023-09-02 18:30:00+05:30     Evening   14425.060594
4   2023-09-02 18:40:00+05:30     Evening   14219.014342
..                        ...         ...            ...
121 2023-09-22 18:10:00+05:30     Evening    1090.393697
122 2023-09-22 18:20:00+05:30     Evening    1298.890713
123 2023-09-22 18:30:00+05:30     Evening    1008.710753
124 2023-09-22 18:40:00+05:30     Evening     990.629910
125 2023-09-22 18:50:00+05:30     Evening    3536.510790

[126 rows x 3 columns]


DataFrame 2:
                         time time_of_day  metric_values
0   2023-09-02 19:00:00+05:30     Evening   10021.258627
1   2023-09-02 19:10:00+05:30     Evening   14542.034055
2   2023-09-02 19:20:00+05:30     Evening   15353.424080
3   2023-09-02 19:30:00+05:30     Ev

In [None]:
df4=dfs[3]

In [None]:
# Define the starting index
start_index = 36  # Example starting index
end_index = start_index + 36

# Extract 36 consecutive values from the specified starting index in df4
df4_consecutive_36_1 = df4.iloc[start_index:end_index]

In [None]:
# Define the starting index
start_index = 72 # Example starting index
end_index = start_index + 36

# Extract 36 consecutive values from the specified starting index in df4
df4_consecutive_36_2 = df4.iloc[start_index:end_index]

In [None]:
# Define the starting index
start_index = 72 # Example starting index
end_index = start_index + 36

# Extract 36 consecutive values from the specified starting index in df4
df4_consecutive_36_3 = df4.iloc[start_index:end_index]

In [None]:
df_new_00 = df_new_00.drop(['rolling_mean', 'rolling_std'], axis=1) # Drop specified columns
df_new_00

Unnamed: 0,time,metric_values,iso_anomaly_encoded
145,2023-09-02 00:10:00+05:30,965.767584,0
146,2023-09-02 00:20:00+05:30,2663.043922,0
147,2023-09-02 00:30:00+05:30,1078.943202,0
148,2023-09-02 00:40:00+05:30,3358.141322,0
149,2023-09-02 00:50:00+05:30,3003.924109,0
...,...,...,...
3055,2023-09-22 05:10:00+05:30,8498.212035,0
3056,2023-09-22 05:20:00+05:30,16768.932334,0
3057,2023-09-22 05:30:00+05:30,5267.293748,0
3058,2023-09-22 05:40:00+05:30,11816.328106,0


In [None]:
grouped = df_new_00.groupby(df_new_00.index // 6)

# Aggregate the groups, taking the first 'time' value and summing the 'metric_values'
result = grouped.agg({'time': 'first', 'metric_values': 'mean','iso_anomaly_encoded':'sum'}) # Use 'mean' to calculate the average

result

Unnamed: 0,time,metric_values,iso_anomaly_encoded
24,2023-09-02 00:10:00+05:30,2213.964028,0
25,2023-09-02 01:00:00+05:30,2492.363553,0
26,2023-09-02 02:00:00+05:30,2824.741435,0
27,2023-09-02 03:00:00+05:30,1971.766794,0
28,2023-09-02 04:00:00+05:30,10304.199873,0
...,...,...,...
505,2023-09-22 01:00:00+05:30,5221.943834,0
506,2023-09-22 02:00:00+05:30,7180.418332,0
507,2023-09-22 03:00:00+05:30,5365.522941,0
508,2023-09-22 04:00:00+05:30,10265.825774,0


In [None]:
result_copy = result.copy()  # Create a copy of the DataFrame
# Create a list of lists containing metric values in groups of 6
interiors_list = [df_new_00['metric_values'].tolist()[i:i+6] for i in range(0, len(df_new_00['metric_values']), 6)]  # Adjust the range to iterate over the entire 'metric_values' column
result_copy['interiors'] = interiors_list

In [None]:
result_copy

Unnamed: 0,time,metric_values,iso_anomaly_encoded,interiors
24,2023-09-02 00:10:00+05:30,2213.964028,0,"[965.7675839489808, 2663.043921911677, 1078.94..."
25,2023-09-02 01:00:00+05:30,2492.363553,0,"[3674.2879532605166, 1226.9953489895322, 1033...."
26,2023-09-02 02:00:00+05:30,2824.741435,0,"[1200.3600493974368, 4388.538007742662, 2948.7..."
27,2023-09-02 03:00:00+05:30,1971.766794,0,"[1027.32660455303, 3209.550168193934, 871.4392..."
28,2023-09-02 04:00:00+05:30,10304.199873,0,"[4398.665999573483, 1083.1634912886764, 19187...."
...,...,...,...,...
505,2023-09-22 01:00:00+05:30,5221.943834,0,"[6710.344584671185, 2618.52242102351, 3647.152..."
506,2023-09-22 02:00:00+05:30,7180.418332,0,"[4206.0068760315025, 7681.5233730848895, 8539...."
507,2023-09-22 03:00:00+05:30,5365.522941,0,"[6866.612181745661, 3556.920270402872, 3457.61..."
508,2023-09-22 04:00:00+05:30,10265.825774,0,"[10933.040701194874, 5924.666330495486, 4621.8..."


In [None]:
# prompt: Using dataframe result_copy: make a new dataframe, named metdat such that (1st,7th,..6 values), (2nd,8th,...6values), till end are aggregated such that for time:first value, metric_values:mean, iso_anomaly_encoded: sum, interiors: combining the lists is done, strictly 120 rows expected in metdat

import pandas as pd

# Create a list to store the aggregated data
metdat_data = []

# Iterate over the result_copy dataframe in steps of 6
for i in range(0, len(result_copy)-6):
    # Extract the relevant data for the current group
    group = result_copy.iloc[i: i + 6]

    # Calculate the aggregated values
    time = group['time'].iloc[0]  # First value of 'time'
    metric_values_mean = group['metric_values'].mean()
    iso_anomaly_sum = group['iso_anomaly_encoded'].sum()
    interiors_combined = sum(group['interiors'].tolist(), [])  # Combine the lists

    # Append the aggregated data to the list
    metdat_data.append({
        'time': time,
        'metric_values': metric_values_mean,
        'iso_anomaly_encoded': iso_anomaly_sum,
        'interiors': interiors_combined
    })

# Create the new dataframe 'metdat'
metdat = pd.DataFrame(metdat_data)

# Check if the number of rows is 120
assert len(metdat) == 120, "The number of rows in 'metdat' is not 120."

# Display the first few rows of 'metdat'
print(metdat.head())


                       time  metric_values  iso_anomaly_encoded  \
0 2023-09-02 00:10:00+05:30    5175.288949                    0   
1 2023-09-02 01:00:00+05:30    6096.718420                    0   
2 2023-09-02 02:00:00+05:30    6234.536167                    0   
3 2023-09-02 03:00:00+05:30    6209.370091                    0   
4 2023-09-02 04:00:00+05:30    6292.325794                    0   

                                           interiors  
0  [965.7675839489808, 2663.043921911677, 1078.94...  
1  [3674.2879532605166, 1226.9953489895322, 1033....  
2  [1200.3600493974368, 4388.538007742662, 2948.7...  
3  [1027.32660455303, 3209.550168193934, 871.4392...  
4  [4398.665999573483, 1083.1634912886764, 19187....  


In [None]:
len(metdat)


120

In [None]:
metdat

Unnamed: 0,time,metric_values,iso_anomaly_encoded,interiors
0,2023-09-02 00:10:00+05:30,5175.288949,0,"[965.7675839489808, 2663.043921911677, 1078.94..."
1,2023-09-02 01:00:00+05:30,6096.718420,0,"[3674.2879532605166, 1226.9953489895322, 1033...."
2,2023-09-02 02:00:00+05:30,6234.536167,0,"[1200.3600493974368, 4388.538007742662, 2948.7..."
3,2023-09-02 03:00:00+05:30,6209.370091,0,"[1027.32660455303, 3209.550168193934, 871.4392..."
4,2023-09-02 04:00:00+05:30,6292.325794,0,"[4398.665999573483, 1083.1634912886764, 19187...."
...,...,...,...,...
115,2023-09-21 01:00:00+05:30,8908.109007,0,"[9168.46275350252, 7887.792647201232, 3662.607..."
116,2023-09-21 02:00:00+05:30,8331.318505,0,"[6923.218885750261, 8310.253500366887, 7284.76..."
117,2023-09-21 03:00:00+05:30,7991.818292,0,"[4228.732742537166, 4170.148141928699, 3665.64..."
118,2023-09-21 04:00:00+05:30,7953.716310,0,"[1031.0099690421916, 1515.591660945862, 5824.7..."


In [None]:
sum(metdat['interiors'].apply(lambda x: len(x) if isinstance(x, list) else 0) == 36)

120

In [None]:
result_copy

Unnamed: 0,time,metric_values,iso_anomaly_encoded,interiors
24,2023-09-02 00:10:00+05:30,2213.964028,0,"[965.7675839489808, 2663.043921911677, 1078.94..."
25,2023-09-02 01:00:00+05:30,2492.363553,0,"[3674.2879532605166, 1226.9953489895322, 1033...."
26,2023-09-02 02:00:00+05:30,2824.741435,0,"[1200.3600493974368, 4388.538007742662, 2948.7..."
27,2023-09-02 03:00:00+05:30,1971.766794,0,"[1027.32660455303, 3209.550168193934, 871.4392..."
28,2023-09-02 04:00:00+05:30,10304.199873,0,"[4398.665999573483, 1083.1634912886764, 19187...."
...,...,...,...,...
505,2023-09-22 01:00:00+05:30,5221.943834,0,"[6710.344584671185, 2618.52242102351, 3647.152..."
506,2023-09-22 02:00:00+05:30,7180.418332,0,"[4206.0068760315025, 7681.5233730848895, 8539...."
507,2023-09-22 03:00:00+05:30,5365.522941,0,"[6866.612181745661, 3556.920270402872, 3457.61..."
508,2023-09-22 04:00:00+05:30,10265.825774,0,"[10933.040701194874, 5924.666330495486, 4621.8..."


In [None]:
result_copy1=result_copy.copy()

In [None]:
result_copy1

Unnamed: 0,time,metric_values,iso_anomaly_encoded,interiors
24,2023-09-02 00:10:00+05:30,2213.964028,0,"[965.7675839489808, 2663.043921911677, 1078.94..."
25,2023-09-02 01:00:00+05:30,2492.363553,0,"[3674.2879532605166, 1226.9953489895322, 1033...."
26,2023-09-02 02:00:00+05:30,2824.741435,0,"[1200.3600493974368, 4388.538007742662, 2948.7..."
27,2023-09-02 03:00:00+05:30,1971.766794,0,"[1027.32660455303, 3209.550168193934, 871.4392..."
28,2023-09-02 04:00:00+05:30,10304.199873,0,"[4398.665999573483, 1083.1634912886764, 19187...."
...,...,...,...,...
505,2023-09-22 01:00:00+05:30,5221.943834,0,"[6710.344584671185, 2618.52242102351, 3647.152..."
506,2023-09-22 02:00:00+05:30,7180.418332,0,"[4206.0068760315025, 7681.5233730848895, 8539...."
507,2023-09-22 03:00:00+05:30,5365.522941,0,"[6866.612181745661, 3556.920270402872, 3457.61..."
508,2023-09-22 04:00:00+05:30,10265.825774,0,"[10933.040701194874, 5924.666330495486, 4621.8..."


In [None]:
# prompt: Using dataframe result_copy1:  make new column name big_interior having the column interior of metdat dataframe, for last 6 rows of result_copy1, copy correspondingly from interiors of result_copy1

import numpy as np
# Create a new column 'big_interior' and fill it with NaN values
result_copy1['big_interior'] = np.nan

# Copy the 'interiors' values for the last 6 rows
result_copy1.loc[result_copy1.tail(6).index, 'big_interior'] = result_copy1.tail(6)['interiors'].values


In [None]:
result_copy1

Unnamed: 0,time,metric_values,iso_anomaly_encoded,interiors,big_interior
24,2023-09-02 00:10:00+05:30,2213.964028,0,"[965.7675839489808, 2663.043921911677, 1078.94...",
25,2023-09-02 01:00:00+05:30,2492.363553,0,"[3674.2879532605166, 1226.9953489895322, 1033....",
26,2023-09-02 02:00:00+05:30,2824.741435,0,"[1200.3600493974368, 4388.538007742662, 2948.7...",
27,2023-09-02 03:00:00+05:30,1971.766794,0,"[1027.32660455303, 3209.550168193934, 871.4392...",
28,2023-09-02 04:00:00+05:30,10304.199873,0,"[4398.665999573483, 1083.1634912886764, 19187....",
...,...,...,...,...,...
505,2023-09-22 01:00:00+05:30,5221.943834,0,"[6710.344584671185, 2618.52242102351, 3647.152...","[6710.344584671185, 2618.52242102351, 3647.152..."
506,2023-09-22 02:00:00+05:30,7180.418332,0,"[4206.0068760315025, 7681.5233730848895, 8539....","[4206.0068760315025, 7681.5233730848895, 8539...."
507,2023-09-22 03:00:00+05:30,5365.522941,0,"[6866.612181745661, 3556.920270402872, 3457.61...","[6866.612181745661, 3556.920270402872, 3457.61..."
508,2023-09-22 04:00:00+05:30,10265.825774,0,"[10933.040701194874, 5924.666330495486, 4621.8...","[10933.040701194874, 5924.666330495486, 4621.8..."


In [None]:
result_copy1.loc[result_copy1.head(120).index, 'big_interior'] = metdat.head(120)['interiors'].values

In [None]:
result_copy1

Unnamed: 0,time,metric_values,iso_anomaly_encoded,interiors,big_interior
24,2023-09-02 00:10:00+05:30,2213.964028,0,"[965.7675839489808, 2663.043921911677, 1078.94...","[965.7675839489808, 2663.043921911677, 1078.94..."
25,2023-09-02 01:00:00+05:30,2492.363553,0,"[3674.2879532605166, 1226.9953489895322, 1033....","[3674.2879532605166, 1226.9953489895322, 1033...."
26,2023-09-02 02:00:00+05:30,2824.741435,0,"[1200.3600493974368, 4388.538007742662, 2948.7...","[1200.3600493974368, 4388.538007742662, 2948.7..."
27,2023-09-02 03:00:00+05:30,1971.766794,0,"[1027.32660455303, 3209.550168193934, 871.4392...","[1027.32660455303, 3209.550168193934, 871.4392..."
28,2023-09-02 04:00:00+05:30,10304.199873,0,"[4398.665999573483, 1083.1634912886764, 19187....","[4398.665999573483, 1083.1634912886764, 19187...."
...,...,...,...,...,...
505,2023-09-22 01:00:00+05:30,5221.943834,0,"[6710.344584671185, 2618.52242102351, 3647.152...","[6710.344584671185, 2618.52242102351, 3647.152..."
506,2023-09-22 02:00:00+05:30,7180.418332,0,"[4206.0068760315025, 7681.5233730848895, 8539....","[4206.0068760315025, 7681.5233730848895, 8539...."
507,2023-09-22 03:00:00+05:30,5365.522941,0,"[6866.612181745661, 3556.920270402872, 3457.61...","[6866.612181745661, 3556.920270402872, 3457.61..."
508,2023-09-22 04:00:00+05:30,10265.825774,0,"[10933.040701194874, 5924.666330495486, 4621.8...","[10933.040701194874, 5924.666330495486, 4621.8..."


In [None]:
# prompt: Using dataframe result_copy1: print 121st row

# Print the 121st row (index 120)
print(result_copy1.iloc[120])


time                                           2023-09-22 00:00:00+05:30
metric_values                                               11946.902298
iso_anomaly_encoded                                                    0
interiors              [19176.101517141327, 19146.06144755415, 8724.2...
big_interior           [19176.101517141327, 19146.06144755415, 8724.2...
Name: 504, dtype: object


In [None]:
print(result_copy1.iloc[0]['big_interior'])

[965.7675839489808, 2663.043921911677, 1078.9432017022389, 3358.1413216154115, 3003.9241085262943, 3676.085910583207, 3674.2879532605166, 1226.9953489895322, 1033.0572618578078, 1119.853200407442, 4223.9016457022235, 2921.8535052316533, 1200.3600493974368, 4388.538007742662, 2948.7881021231588, 4454.561134217098, 1034.3478116963108, 1051.3411549082894, 1027.32660455303, 3209.550168193934, 871.4392711917696, 1341.1678985579472, 4329.775665789611, 3444.355000913919, 4398.665999573483, 1083.1634912886764, 19187.769918232872, 17182.937528547583, 16528.307297696407, 18928.76145070533, 21013.668682115018, 18686.35915173928, 1533.0573543395708, 3924.2172394677027, 3382.124192383562, 3101.056883848809]


In [None]:
import pandas as pd
import altair as alt

# Assuming 'result_copy1' is your DataFrame
# Extract the data for the first cell of big_interior
# Ensure you are selecting the data correctly to get a list or array-like object
first_cell_data = result_copy1.iloc[0]['big_interior']

# Create a DataFrame for plotting
plot_df = pd.DataFrame({
    'time': result_copy1['time'][:len(first_cell_data)],  # Match time length with data
    'value': first_cell_data
})

# Create the line chart
chart = alt.Chart(plot_df).mark_line().encode(
    x='time',
    y='value'
).properties(
    title='Value of First Cell in big_interior Over Time'
)

chart

In [None]:
import pandas as pd
import altair as alt

# Assuming 'result_copy1' is your DataFrame
# Extract the data for the first cell of big_interior
first_cell_data = result_copy1.iloc[0]['big_interior']

# Scale the data by dividing by 6
scaled_data = [x / 6 for x in first_cell_data]  # Change here: divide by 6

# Create a DataFrame for plotting with scaled data
plot_df = pd.DataFrame({
    'time': result_copy1['time'][:len(first_cell_data)],  # Match time length with data
    'value': scaled_data
})

# Create the line chart
chart = alt.Chart(plot_df).mark_line().encode(
    x='time',
    y='value'
).properties(
    title='Value of First Cell in big_interior Over Time (Scaled)'
)

chart

In [None]:
fin_met_dat= result_copy1.copy()

In [None]:
fin_met_dat

Unnamed: 0,time,metric_values,iso_anomaly_encoded,interiors,big_interior
24,2023-09-02 00:10:00+05:30,2213.964028,0,"[965.7675839489808, 2663.043921911677, 1078.94...","[965.7675839489808, 2663.043921911677, 1078.94..."
25,2023-09-02 01:00:00+05:30,2492.363553,0,"[3674.2879532605166, 1226.9953489895322, 1033....","[3674.2879532605166, 1226.9953489895322, 1033...."
26,2023-09-02 02:00:00+05:30,2824.741435,0,"[1200.3600493974368, 4388.538007742662, 2948.7...","[1200.3600493974368, 4388.538007742662, 2948.7..."
27,2023-09-02 03:00:00+05:30,1971.766794,0,"[1027.32660455303, 3209.550168193934, 871.4392...","[1027.32660455303, 3209.550168193934, 871.4392..."
28,2023-09-02 04:00:00+05:30,10304.199873,0,"[4398.665999573483, 1083.1634912886764, 19187....","[4398.665999573483, 1083.1634912886764, 19187...."
...,...,...,...,...,...
505,2023-09-22 01:00:00+05:30,5221.943834,0,"[6710.344584671185, 2618.52242102351, 3647.152...","[6710.344584671185, 2618.52242102351, 3647.152..."
506,2023-09-22 02:00:00+05:30,7180.418332,0,"[4206.0068760315025, 7681.5233730848895, 8539....","[4206.0068760315025, 7681.5233730848895, 8539...."
507,2023-09-22 03:00:00+05:30,5365.522941,0,"[6866.612181745661, 3556.920270402872, 3457.61...","[6866.612181745661, 3556.920270402872, 3457.61..."
508,2023-09-22 04:00:00+05:30,10265.825774,0,"[10933.040701194874, 5924.666330495486, 4621.8...","[10933.040701194874, 5924.666330495486, 4621.8..."


In [None]:
iso_interiors_list = [df_new_00['iso_anomaly_encoded'].tolist()[i:i+6] for i in range(0, len(df_new_00['iso_anomaly_encoded']), 6)]  # Adjust the range to iterate over the entire 'metric_values' column
fin_met_dat['iso_interiors'] = iso_interiors_list

In [None]:
fin_met_dat

Unnamed: 0,time,metric_values,iso_anomaly_encoded,interiors,big_interior,iso_interiors
24,2023-09-02 00:10:00+05:30,2213.964028,0,"[965.7675839489808, 2663.043921911677, 1078.94...","[965.7675839489808, 2663.043921911677, 1078.94...","[0, 0, 0, 0, 0, 0]"
25,2023-09-02 01:00:00+05:30,2492.363553,0,"[3674.2879532605166, 1226.9953489895322, 1033....","[3674.2879532605166, 1226.9953489895322, 1033....","[0, 0, 0, 0, 0, 0]"
26,2023-09-02 02:00:00+05:30,2824.741435,0,"[1200.3600493974368, 4388.538007742662, 2948.7...","[1200.3600493974368, 4388.538007742662, 2948.7...","[0, 0, 0, 0, 0, 0]"
27,2023-09-02 03:00:00+05:30,1971.766794,0,"[1027.32660455303, 3209.550168193934, 871.4392...","[1027.32660455303, 3209.550168193934, 871.4392...","[0, 0, 0, 0, 0, 0]"
28,2023-09-02 04:00:00+05:30,10304.199873,0,"[4398.665999573483, 1083.1634912886764, 19187....","[4398.665999573483, 1083.1634912886764, 19187....","[0, 0, 0, 0, 0, 0]"
...,...,...,...,...,...,...
505,2023-09-22 01:00:00+05:30,5221.943834,0,"[6710.344584671185, 2618.52242102351, 3647.152...","[6710.344584671185, 2618.52242102351, 3647.152...","[0, 0, 0, 0, 0, 0]"
506,2023-09-22 02:00:00+05:30,7180.418332,0,"[4206.0068760315025, 7681.5233730848895, 8539....","[4206.0068760315025, 7681.5233730848895, 8539....","[0, 0, 0, 0, 0, 0]"
507,2023-09-22 03:00:00+05:30,5365.522941,0,"[6866.612181745661, 3556.920270402872, 3457.61...","[6866.612181745661, 3556.920270402872, 3457.61...","[0, 0, 0, 0, 0, 0]"
508,2023-09-22 04:00:00+05:30,10265.825774,0,"[10933.040701194874, 5924.666330495486, 4621.8...","[10933.040701194874, 5924.666330495486, 4621.8...","[0, 0, 0, 0, 0, 1]"


In [None]:
metdata = []

# Iterate over the result_copy dataframe in steps of 6
for i in range(0, len(result_copy)-6):
    # Extract the relevant data for the current group
    group = fin_met_dat.iloc[i: i + 6]
    interiors_combined = sum(group['iso_interiors'].tolist(), [])  # Combine the lists

    # Append the aggregated data to the list
    metdata.append({
        'big_iso_interiors': interiors_combined
    })

# Create the new dataframe 'metdat'
met = pd.DataFrame(metdata)

In [None]:
met

Unnamed: 0,big_iso_interiors
0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...
115,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
116,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
117,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
118,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [None]:
fin_met_dat['big_iso_interiors'] = np.nan

# Copy the 'interiors' values for the last 6 rows
fin_met_dat.loc[fin_met_dat.tail(6).index, 'big_iso_interiors'] = fin_met_dat.tail(6)['iso_interiors'].values
fin_met_dat.loc[fin_met_dat.head(120).index, 'big_iso_interiors'] = met.head(120)['big_iso_interiors'].values

In [None]:
fin_met_dat

Unnamed: 0,time,metric_values,iso_anomaly_encoded,interiors,big_interior,iso_interiors,big_iso_interiors
24,2023-09-02 00:10:00+05:30,2213.964028,0,"[965.7675839489808, 2663.043921911677, 1078.94...","[965.7675839489808, 2663.043921911677, 1078.94...","[0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
25,2023-09-02 01:00:00+05:30,2492.363553,0,"[3674.2879532605166, 1226.9953489895322, 1033....","[3674.2879532605166, 1226.9953489895322, 1033....","[0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
26,2023-09-02 02:00:00+05:30,2824.741435,0,"[1200.3600493974368, 4388.538007742662, 2948.7...","[1200.3600493974368, 4388.538007742662, 2948.7...","[0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
27,2023-09-02 03:00:00+05:30,1971.766794,0,"[1027.32660455303, 3209.550168193934, 871.4392...","[1027.32660455303, 3209.550168193934, 871.4392...","[0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
28,2023-09-02 04:00:00+05:30,10304.199873,0,"[4398.665999573483, 1083.1634912886764, 19187....","[4398.665999573483, 1083.1634912886764, 19187....","[0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...
505,2023-09-22 01:00:00+05:30,5221.943834,0,"[6710.344584671185, 2618.52242102351, 3647.152...","[6710.344584671185, 2618.52242102351, 3647.152...","[0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0]"
506,2023-09-22 02:00:00+05:30,7180.418332,0,"[4206.0068760315025, 7681.5233730848895, 8539....","[4206.0068760315025, 7681.5233730848895, 8539....","[0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0]"
507,2023-09-22 03:00:00+05:30,5365.522941,0,"[6866.612181745661, 3556.920270402872, 3457.61...","[6866.612181745661, 3556.920270402872, 3457.61...","[0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0]"
508,2023-09-22 04:00:00+05:30,10265.825774,0,"[10933.040701194874, 5924.666330495486, 4621.8...","[10933.040701194874, 5924.666330495486, 4621.8...","[0, 0, 0, 0, 0, 1]","[0, 0, 0, 0, 0, 1]"


In [None]:
import altair as alt

# Create a selection for the cursor
cursor = alt.selection_single(on='mouseover', nearest=True, empty='none')

# Base chart for metric values
base = alt.Chart(result_copy1).mark_line(color='green').encode(
    x='time',
    y='metric_values'
).properties(
    width=400  # Set an explicit width for the base chart
)

# Points for anomalies
points = base.mark_point().encode(
    color=alt.condition(
        alt.datum['iso_anomaly_encoded'] > 0,
        'iso_anomaly_encoded:N',
        alt.value('lightgray')
    ),
    tooltip=['time', 'metric_values', 'iso_anomaly_encoded']
).add_selection(
    cursor
)

# Line for big_interior
interior_line = alt.Chart(result_copy1).transform_filter(
    cursor
).mark_line(color='yellow').encode(
    x='time',
    y='big_interior:Q',
    detail='big_interior:N'
).properties(
    width=base.properties()['width']  # Set width to 1/6th of the base chart
)

# Combine charts and add legend
chart = (base + points + interior_line).configure_legend(
    orient='bottom'
).interactive()

chart

In [None]:
import altair as alt

# Create a selection for the cursor
cursor = alt.selection_single(on='mouseover', nearest=True, empty='none')

# Base chart for metric values
base = alt.Chart(result_copy1).mark_line(color='green').encode(
    x='time',
    y='metric_values'
).properties(
    width=400  # Set an explicit width for the base chart
)

# Points for anomalies
points = base.mark_point().encode(
    color=alt.condition(
        alt.datum['iso_anomaly_encoded'] > 0,
        'iso_anomaly_encoded:N',
        alt.value('lightgray')
    ),
    tooltip=['time', 'metric_values', 'iso_anomaly_encoded']
).add_selection(
    cursor
)

# Line for big_interior, starting from the cursor position
interior_line = alt.Chart(result_copy1).transform_filter(
    cursor
).transform_calculate(
    # Calculate time values starting from the cursor position
    time_from_cursor='datum.time - datum.time[0]'
).mark_line(color='yellow').encode(
    x='time_from_cursor:T',  # Use the calculated time values
    y='big_interior:Q',
    detail='big_interior:N'
).properties(
    width=base.properties()['width'] / 6  # Set width to 1/6th of the base chart
)

# Combine charts and add legend
chart = (base + points + interior_line).configure_legend(
    orient='bottom'
).interactive()

chart

In [None]:
import altair as alt

# Create a selection for the cursor
cursor = alt.selection_single(on='mouseover', nearest=True, empty='none')

# Base chart for metric values
base = alt.Chart(result_copy1).mark_line(color='green').encode(
    x='time',
    y='metric_values'
).properties(
    width=400  # Set an explicit width for the base chart
)

# Points for anomalies
points = base.mark_point().encode(
    color=alt.condition(
        alt.datum['iso_anomaly_encoded'] > 0,
        'iso_anomaly_encoded:N',
        alt.value('lightgray')
    ),
    tooltip=['time', 'metric_values', 'iso_anomaly_encoded']
).add_selection(
    cursor
)

# Base chart for metric values
interior_line = alt.Chart(df4_consecutive_36_1).mark_line(color='yellow').encode(
    x='time',
    y='metric_values'
).properties(
    width=400  # Set an explicit width for the base chart
)

# Line for big_interior
#interior_line = alt.Chart(result_copy1).transform_filter(
#    cursor
#).mark_line(color='blue').encode(
#    x='time',
#    y='big_interior:Q',
#    detail='big_interior:N'
#).properties(
#    width=600  # Set width to 1/6th of the base chart
#)

# Combine charts and add legend
chart = (base + points + interior_line).configure_legend(
    orient='bottom'
).interactive()

chart

In [None]:
import altair as alt

# Create a selection for the cursor
cursor = alt.selection_single(on='mouseover', nearest=True, empty='none')

# Base chart for metric values
base = alt.Chart(result_copy1).mark_line(color='green').encode(
    x='time',
    y='metric_values'
).properties(
    width=400  # Set an explicit width for the base chart
)

# Points for anomalies
points = base.mark_point().encode(
    color=alt.condition(
        alt.datum['iso_anomaly_encoded'] > 0,
        'iso_anomaly_encoded:N',
        alt.value('lightgray')
    ),
    tooltip=['time', 'metric_values', 'iso_anomaly_encoded']
).add_selection(
    cursor
)

# Base chart for metric values
interior_line = alt.Chart(df4_consecutive_36_2).mark_line(color='yellow').encode(
    x='time',
    y='metric_values'
).properties(
    width=400  # Set an explicit width for the base chart
)

# Line for big_interior
#interior_line = alt.Chart(result_copy1).transform_filter(
#    cursor
#).mark_line(color='blue').encode(
#    x='time',
#    y='big_interior:Q',
#    detail='big_interior:N'
#).properties(
#    width=600  # Set width to 1/6th of the base chart
#)

# Combine charts and add legend
chart = (base + points + interior_line).configure_legend(
    orient='bottom'
).interactive()

chart

In [None]:
import altair as alt

# Create a selection for the cursor
cursor = alt.selection_single(on='mouseover', nearest=True, empty='none')

# Base chart for metric values
base = alt.Chart(result_copy1).mark_line(color='green').encode(
    x='time',
    y='metric_values'
).properties(
    width=400  # Set an explicit width for the base chart
)

# Points for anomalies
points = base.mark_point().encode(
    color=alt.condition(
        alt.datum['iso_anomaly_encoded'] > 0,
        'iso_anomaly_encoded:N',
        alt.value('lightgray')
    ),
    tooltip=['time', 'metric_values', 'iso_anomaly_encoded']
).add_selection(
    cursor
)

# Base chart for metric values
interior_line = alt.Chart(df4_consecutive_36_3).mark_line(color='yellow').encode(
    x='time',
    y='metric_values'
).properties(
    width=400  # Set an explicit width for the base chart
)

# Line for big_interior
#interior_line = alt.Chart(result_copy1).transform_filter(
#    cursor
#).mark_line(color='blue').encode(
#    x='time',
#    y='big_interior:Q',
#    detail='big_interior:N'
#).properties(
#    width=600  # Set width to 1/6th of the base chart
#)

# Combine charts and add legend
chart = (base + points + interior_line).configure_legend(
    orient='bottom'
).interactive()

chart

In [None]:
result_copy1['iso_anomaly_encoded'].unique() # Counts the number of unique values in the 'iso_anomaly_encoded' column

array([0, 1])

In [None]:
num =result_copy1['iso_anomaly_encoded'].nunique() # Counts the number of unique values in the 'iso_anomaly_encoded' column
num

2

In [None]:
!pip install plotly
import plotly.graph_objects as go

# Create the initial scatter plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=result_copy['time'], y=result_copy['metric_values'], mode='lines', line=dict(color='green'), name='Metric Values'))

# Mark points with iso_anomaly_encoded > 1 in red
anomaly_indices = result_copy[result_copy['iso_anomaly_encoded'] > 1].index
fig.add_trace(go.Scatter(x=result_copy.loc[anomaly_indices, 'time'], y=result_copy.loc[anomaly_indices, 'metric_values'],
                         mode='markers', marker=dict(color='red'), name='Anomalies'))

# Add hover functionality with improved description
fig.update_traces(
    hoverinfo="text",
    hovertext=[f"Time: {time}<br>Interiors: {', '.join(map(str, interiors))}"
               for time, interiors in zip(result_copy['time'], result_copy['interiors'])]
)

# Define the hover event handler
def update_plot(trace, points, selector):
    # Get the index of the hovered point
    ind = points.point_inds[0]
    # Extract the corresponding interiors
    interiors = result_copy['interiors'][ind]

    # Add a trace for the interiors
    fig.add_trace(go.Scatter(y=interiors, mode='lines', line=dict(color='yellow'), name='Interiors'))

# Attach the hover event handler
for trace in fig.data:
    trace.on_hover(update_plot)

fig.show()



In [None]:
!pip install plotly
import plotly.graph_objects as go

# Create the initial scatter plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=result['time'], y=result['metric_values'], mode='lines', line=dict(color='green'), name='Metric Values'))

# Mark points with iso_anomaly_encoded > 1 in red
anomaly_indices = result[result['iso_anomaly_encoded'] > 1].index
fig.add_trace(go.Scatter(x=result.loc[anomaly_indices, 'time'], y=result.loc[anomaly_indices, 'metric_values'],
                         mode='markers', marker=dict(color='red'), name='Anomalies'))

# Add hover functionality with improved description
fig.update_traces(
    hoverinfo="text",
    hovertext=[f"Time: {time}<br>Interiors: {', '.join(map(str, interiors))}"
               for time, interiors in zip(result['time'], result_copy['interiors'])]
)

# Define the hover event handler
def update_plot(trace, points, selector):
    # Get the index of the hovered point
    ind = points.point_inds[0]
    # Extract the corresponding interiors
    interiors = result_copy['interiors'][ind]

    # Add a trace for the interiors
    fig.add_trace(go.Scatter(y=interiors, mode='lines', line=dict(color='yellow'), name='Interiors'))

# Attach the hover event handler
for trace in fig.data:
    trace.on_hover(update_plot)

fig.show()



In [None]:
# Install necessary libraries
!pip install plotly pandas

# Import libraries
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display
import ipywidgets as widgets



In [None]:

fin_met_dat['time'] = pd.to_datetime(fin_met_dat['time'])
 # Extract the relevant data for the current slider value
current_data = fin_met_dat
time = current_data['time']
metric_values = current_data['metric_values']
iso_anomaly = current_data['iso_anomaly_encoded']
big_interior = current_data['big_interior']
big_iso_interiors = current_data['big_iso_interiors']

hover_time = current_data['time']
    # Create traces for the plot
trace1 = go.Scatter(
        x=fin_met_dat['time'],
        y=fin_met_dat['metric_values'],
        mode='lines',
        name='Metric Values',
        line=dict(color='green')
    )
trace2 = go.Scatter(
        x=fin_met_dat[fin_met_dat['iso_anomaly_encoded'] > 0]['time'],
        y=fin_met_dat[fin_met_dat['iso_anomaly_encoded'] > 0]['metric_values'],
        mode='markers',
        name='Anomalies',
        marker=dict(color='red')
    )

    # Highlight the hover data big_interior values and corresponding big_iso_interiors
hover_data_index = fin_met_dat[fin_met_dat['time'] == hover_time].index[0]
hover_big_interior = fin_met_dat.iloc[hover_data_index]['big_interior']
hover_big_iso_interiors = fin_met_dat.iloc[hover_data_index]['big_iso_interiors']

trace3 = go.Scatter(
        x=[hover_time + pd.Timedelta(minutes=10 * i) for i in range(len(hover_big_interior))],
        y=hover_big_interior,
        mode='lines',
        name='Hover Big Interior',
        line=dict(color='yellow')
    )
trace4 = go.Scatter(
        x=[hover_time + pd.Timedelta(minutes=10 * i) for i in range(len(hover_big_iso_interiors)) if hover_big_iso_interiors[i] > 0],
        y=[hover_big_interior[i] for i in range(len(hover_big_iso_interiors)) if hover_big_iso_interiors[i] > 0],
        mode='markers',
        name='Hover Big Iso Anomalies',
        marker=dict(color='purple')
    )

    # Define the layout for the plot
layout = go.Layout(
        title='Metric Values and Anomalies Over Time',
        xaxis=dict(title='Time'),
        yaxis=dict(title='Metric Values'),
        hovermode='closest'
    )

go.Figure(data=[trace1, trace2, trace3, trace4], layout=layout)



In [None]:
# Install necessary libraries
!pip install plotly pandas

# Import libraries
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display
import ipywidgets as widgets

fin_met_dat['time'] = pd.to_datetime(fin_met_dat['time'])

# Create the initial plot
def create_figure(slider_value):
    # Extract the relevant data for the current slider value
    current_data = fin_met_dat.iloc[slider_value]
    time = current_data['time']
    metric_values = current_data['metric_values']
    iso_anomaly = current_data['iso_anomaly_encoded']
    big_interior = current_data['big_interior']
    big_iso_interiors = current_data['big_iso_interiors']

    # Extract hover time (simulate hover by using slider value)
    hover_time = time

    # Create traces for the plot
    trace1 = go.Scatter(
        x=fin_met_dat['time'],
        y=fin_met_dat['metric_values'],
        mode='lines',
        name='Metric Values',
        line=dict(color='green')
    )
    trace2 = go.Scatter(
        x=fin_met_dat[fin_met_dat['iso_anomaly_encoded'] > 0]['time'],
        y=fin_met_dat[fin_met_dat['iso_anomaly_encoded'] > 0]['metric_values'],
        mode='markers',
        name='Anomalies',
        marker=dict(color='red')
    )

    # Highlight the hover data big_interior values and corresponding big_iso_interiors
    hover_data_index = fin_met_dat[fin_met_dat['time'] == hover_time].index[0]
    hover_big_interior = fin_met_dat.iloc[hover_data_index]['big_interior']
    hover_big_iso_interiors = fin_met_dat.iloc[hover_data_index]['big_iso_interiors']

    trace3 = go.Scatter(
        x=[hover_time + pd.Timedelta(minutes=10 * i) for i in range(len(hover_big_interior))],
        y=hover_big_interior,
        mode='lines',
        name='Hover Big Interior',
        line=dict(color='yellow')
    )
    trace4 = go.Scatter(
        x=[hover_time + pd.Timedelta(minutes=10 * i) for i in range(len(hover_big_iso_interiors)) if hover_big_iso_interiors[i] > 0],
        y=[hover_big_interior[i] for i in range(len(hover_big_iso_interiors)) if hover_big_iso_interiors[i] > 0],
        mode='markers',
        name='Hover Big Iso Anomalies',
        marker=dict(color='purple')
    )

    # Define the layout for the plot
    layout = go.Layout(
        title='Metric Values and Anomalies Over Time',
        xaxis=dict(title='Time'),
        yaxis=dict(title='Metric Values'),
        hovermode='closest'
    )

    return go.Figure(data=[trace1, trace2, trace3, trace4], layout=layout)

# Define a function to update the plot based on the slider value
def update_plot(slider_value):
    fig = create_figure(slider_value)
    fig.show()

# Create and display the slider widget
slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(fin_met_dat) - 1,
    step=1,
    description='Time Index',
    continuous_update=False
)
widgets.interactive(update_plot, slider_value=slider)



interactive(children=(IntSlider(value=0, continuous_update=False, description='Time Index', max=125), Output()…

In [None]:
column_options = [col for col in new_df.columns if 'time' not in col.lower()]

In [None]:
column_options

['a_res',
 'ax',
 'ay',
 'az',
 'g_res',
 'gx',
 'gy',
 'gz',
 'm_res',
 'mx',
 'my',
 'mz',
 'a_res_diff',
 'ax_diff',
 'ay_diff',
 'az_diff',
 'g_res_diff',
 'gx_diff',
 'gy_diff',
 'gz_diff',
 'm_res_diff',
 'mx_diff',
 'my_diff',
 'mz_diff',
 'a_res_fft',
 'ax_fft',
 'ay_fft',
 'az_fft',
 'g_res_fft',
 'gx_fft',
 'gy_fft',
 'gz_fft',
 'm_res_fft',
 'mx_fft',
 'my_fft',
 'mz_fft',
 'a_res_diff_fft',
 'ax_diff_fft',
 'ay_diff_fft',
 'az_diff_fft',
 'g_res_diff_fft',
 'gx_diff_fft',
 'gy_diff_fft',
 'gz_diff_fft',
 'm_res_diff_fft',
 'mx_diff_fft',
 'my_diff_fft',
 'mz_diff_fft']